home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
- /* bgopal: (1993-4) redesigned/rewritten using agrep's library interface */
- #include <sys/param.h>
- #include <errno.h>
- #include "glimpse.h"
- #include "defs.h"
- #include <fcntl.h>
- #include "checkfile.h"
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <sys/time.h>
-
- #define CLIENTSERVER 1
- #define USE_MSGHDR 0
- #define USE_UNIXDOMAIN 0
- #define DEBUG 0
-
- #define DEF_SERV_PORT 2001
- #define MIN_SERV_PORT 1024
- #define MAX_SERV_PORT 30000
- #define SERVER_QUEUE_SIZE 10 /* number of requests to buffer up while processing one request = 5 */
- #if CLIENTSERVER
- #include <sys/socket.h>
- #include <sys/un.h>
- #include <netinet/in.h>
- #include <arpa/inet.h>
- #include <netdb.h>
- /* #include <sys/uio.h> */
- /* #include <sgtty.h> */
- #include <signal.h>
- #if defined(_IBMR2)
- #include <sys/select.h>
- #endif
- #endif /*CLIENTSERVER*/
-
- /* Borrowed from C-Lib */
- extern char **environ;
- extern int errno;
-
- /* For client-server protocol */
- CHAR SERV_HOST[MAXNAME];
- int SERV_PORT;
- char glimpse_reqbuf[MAX_ARGS*MAX_NAME_LEN];
- extern int glimpse_clientdied; /* set if signal received about dead socket: need agrep variable so that exec() can return quickly */
-
- /* Borrowed from agrep.c */
- extern int D_length; /* global variable in agrep */
- extern int D; /* global variable in agrep */
- extern int pattern_index;
- extern int REGEX, WORDBOUND; /* To catch -w + REGEX error message in glimpse */
- /* These are used for byte level index search */
- extern CHAR CurrentFileName[MAX_LINE_LEN];
- extern int SetCurrentFileName;
- extern int CurrentByteOffset;
- extern int SetCurrentByteOffset;
- extern int execfd;
- extern int agrep_initialfd;
- extern CHAR *agrep_inbuffer;
- extern int agrep_inlen;
- extern int agrep_inpointer;
- extern FILE *agrep_finalfp;
- extern CHAR *agrep_outbuffer;
- extern int agrep_outlen;
- extern int agrep_outpointer;
- extern int glimpse_call; /* prevent agrep from printing out its usage */
- extern int glimpse_isserver; /* prevent agrep from asking for user input */
- extern int use_previous_state; /* speed up byte-level-search within one file */
- int first_search = 1; /* intra/interaction in process_query() and glimpse_search() */
-
- /* Borrowed from build_in.c */
- extern int OneFilePerBlock;
- extern int StructuredIndex;
- extern unsigned int dest_index_set[REAL_PARTITION];
- extern unsigned char dest_index_buf[REAL_INDEX_BUF];
- extern unsigned int src_index_set[REAL_PARTITION];
- extern unsigned char src_index_buf[REAL_INDEX_BUF];
- extern int mask_int[32];
- extern int indexable_char[256];
- int test_indexable_char[256];
- extern int p_table[MAX_PARTITION];
- extern int GMAX_WORD_SIZE;
- extern int IndexNumber; /* used in getword() */
- extern int InterpretSpecial; /* used to "not-split" agrep-regexps */
- extern int UseFilters; /* defined in build_in.c, used for filtering routines in io.c */
- extern int ByteLevelIndex;
-
- /* OPTIONS/FLAGS */
- int CONTACT_SERVER = 0; /* Should client try to call server at all or just process query on its own? */
- int NOBYTELEVEL = 0; /* Some cases where we cannot do byte level fast-search: ALWAYS 0 if !ByteLevelIndex */
- int OPTIMIZEBYTELEVEL = 0; /* Some cases where we don't want to do byte level search since number of files is small */
- int GLIMITOUTPUT = 0; /* max no. of output lines: 0=>infinity=default=nolimit */
- int GBESTMATCH = 0; /* Should I change -B to -# where # = no. of errors? */
- int GRECURSIVE = 0;
- int GNOPROMPT = 0;
- int GOUTTAIL = 0;
- int GFILENAMEONLY = 0; /* how to do it if it is an and expression in structured queries */
- int GNOFILENAME=0;
- int MATCHFILE = 0;
- int PRINTATTR = 0;
- int Pat_as_is=0;
- int Only_first=0;
- int WHOLEFILESCOPE=0; /* used only when foundattr is NOT set: otherwise, scope is whole file anyway */
- int foundattr=0; /* set in split.c -- != 0 only when StructuredIndex AND query is structured */
-
- /* structured queries */
- CHAR ***attr_vals; /* matrix of char pointers: row=max #of attributes, col=max possible values */
- CHAR **attr_found; /* did the expression corr. to each value in attr_vals match? */
- ParseTree *GParse; /* what kind of expression corr. to attr are we looking for */
-
- /* arbitrary booleans */
- ParseTree terminals[MAXNUM_PAT]; /* parse tree's terminal node pointers pt. to elements of this array; also used outside */
- char matched_terminals[MAXNUM_PAT]; /* ...[i] is 1 if i'th terminal matched: used in filter_output and eval_tree */
- int num_terminals; /* number of terminal patterns */
- int ComplexBoolean=0; /* 1 if we need to use parse trees and the eval function */
-
- /* index search */
- CHAR *pat_list[MAXNUM_PAT]; /* complete words within global pattern */
- int pat_lens[MAXNUM_PAT]; /* their lengths */
- int pat_attr[MAXNUM_PAT]; /* set of attributes */
- int is_mgrep_pat[MAXNUM_PAT];
- int mgrep_pat_index[MAXNUM_PAT];
- int num_mgrep_pat;
- CHAR pat_buf[(MAXNUM_PAT + 2)*MAXPAT];
- int pat_ptr = 0;
- extern char INDEX_DIR[MAX_LINE_LEN];
- char TEMP_DIR[MAX_LINE_LEN];
- char indexnumberbuf[256]; /* to read in first few lines of the index */
- char *index_argv[MAX_ARGS];
- int index_argc = 0;
- int bestmatcherrors=0; /* set during index search, used later on */
- int patindex;
- int patbufpos = -1;
- char tempfile[MAX_NAME_LEN];
-
- /* agrep search */
- char *agrep_argv[MAX_ARGS];
- int agrep_argc = 0;
- CHAR *FileOpt; /* the option list after -F */
- int fileopt_length;
- CHAR GPattern[MAXPAT];
- int GM;
- CHAR APattern[MAXPAT];
- int AM;
- CHAR GD_pattern[MAXPAT];
- int GD_length;
- CHAR **GTextfiles;
- int GFileIndex[MAXNUM_FILE];
- int GNumfiles;
- int GNumpartitions;
- CHAR GProgname[MAXNAME];
-
- /* persistent file descriptors */
- #if BG_DEBUG
- FILE *debug; /* file descriptor for debugging output */
- #endif /*BG_DEBUG*/
- FILE *indexfp = NULL; /* glimpse index */
- FILE *partfp = NULL; /* glimpse partitions */
- FILE *nullfp = NULL; /* to discard output: agrep -s doesn't work properly */
- int svstdin = 0, svstdout = 1, svstderr = 2;
-
- /* Index manipulation */
- struct offsets **src_offset_table;
- struct offsets **multi_dest_offset_table[MAXNUM_PAT];
- unsigned int multi_dest_index_set[MAXNUM_PAT][REAL_PARTITION];
- extern free_list();
- struct stat index_stat_buf, file_stat_buf;
-
- /* Direct agrep access for bytelevel-indices */
- extern int COUNT, INVERSE, TCOMPRESSED, NOFILENAME, POST_FILTER, OUTTAIL, BYTECOUNT,
- LIMITOUTPUT, DELIMITER, SILENT, FILENAMEONLY, num_of_matched, prev_num_of_matched, FILEOUT;
- CHAR matched_region[MAX_REGION_LIMIT*2 + MAXPATT*2];
- int RegionLimit=DEFAULT_REGION_LIMIT;
-
- /* Returns number of matched records/lines. Uses agrep's options to output stuff nicely */
- int
- glimpse_search(AM, APattern, GD_length, GD_pattern, filename, fileindex, src_offset_table, outfp)
- int AM;
- unsigned char APattern[];
- int GD_length;
- unsigned char GD_pattern[];
- char *filename;
- int fileindex;
- struct offsets *src_offset_table[];
- FILE *outfp;
- {
- FILE *infp;
- char sig[SIGNATURE_LEN];
- struct offsets **p1, *tp1;
- CHAR *text, *curtextend, *curtextbegin;
- int times;
- int num, ret, totalret = 0;
- int prevoffset, begininterval = 0, endinterval = -1;
- CHAR *beginpageptr, *endpageptr;
- int beginpage = 0, endpage = -1;
- static int MAXTIMES, MAXPGTIMES, pagesize;
- static int first_time = 1;
-
- /*
- * If can't open file for read, quit
- * For each offset for that file:
- * seek to that point
- * go back until delimiter, go forward until delimiter, output it: MAX_REGION_LIMIT is 16K on either side.
- * read in units of RegionLimit
- * before outputting matched record, use options to put prefixes (or use memagrep which does everything?)
- */
-
- if (first_time) {
- pagesize = DISKBLOCKSIZE;
- MAXTIMES = ((MAX_REGION_LIMIT / RegionLimit) > 1) ? (MAX_REGION_LIMIT / RegionLimit) : 1;
- MAXPGTIMES = ((MAX_REGION_LIMIT / pagesize) > 1) ? (MAX_REGION_LIMIT / pagesize) : 1;
- first_time = 0;
- }
- /* Safety: must end/begin with delim */
- memcpy(matched_region, GD_pattern, GD_length);
- memcpy(matched_region+MAXPATT+2*MAX_REGION_LIMIT, GD_pattern, GD_length);
- text = &matched_region[MAX_REGION_LIMIT+MAXPATT];
-
- if ((infp = fopen(filename, "r")) == NULL) return 0;
- #if 0
- /* Cannot search in .CZ files since offset computations will be incorrect */
- TCOMPRESSED = ON;
- if (!tuncompressible_filename(file_list[i], strlen(file_list[i]))) TCOMPRESSED = OFF;
- num_read = fread(sig, 1, SIGNATURE_LEN, infp);
- if ((TCOMPRESSED == ON) && tuncompressible(sig, num_read)) {
- EASYSEARCH = sig[SIGNATURE_LEN-1];
- if (!EASYSEARCH) {
- fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName); /* not filename!!! */
- }
- }
- else TCOMPRESSED = OFF;
- #endif /*0*/
-
- p1 = &src_offset_table[fileindex];
- while (*p1 != NULL) {
- if ( (begininterval <= (*p1)->offset) && (endinterval >= (*p1)->offset) ) { /* already covered this area */
- #if DEBUG
- printf("ignoring %d in [%d,%d]\n", (*p1)->offset, begininterval, endinterval);
- #endif /*DEBUG*/
- tp1 = *p1;
- *p1 = (*p1)->next;
- my_free(tp1, sizeof(struct offsets));
- continue;
- }
-
- TCOMPRESSED = OFF;
- #if 0
- if ( (beginpage <= (*p1)->offset) && (endpage >= (*p1)->offset) ) { /* already read this area */
- text += (*p1)->offset - prevoffset;
- times = 0;
- while (times < MAXPGTIMES) {
- if ( ((curtextend = forward_delimiter(text, endpageptr, GD_pattern, GD_length, 1)) < endpageptr) ||
- (endpageptr >= &matched_region[MAX_REGION_LIMIT*2+MAXPATT]) ) break;
- times ++;
- fseek(infp, endpage, 0);
- num = (&matched_region[MAX_REGION_LIMIT*2+MAXPATT] - endpageptr < pagesize) ? (&matched_region[MAX_REGION_LIMIT*2+MAXPATT*2] - endpageptr) : pagesize;
- if ((num = fread(endpageptr, 1, num, infp)) <= 0) break;
- endpage += num;
- endpageptr += num;
- if (endpageptr <= text) {
- curtextend = text; /* error in value of offset: file was modified and offsets no longer true: your RISK! */
- break;
- }
- }
- times = 0;
- while (times < MAXPGTIMES) {
- if ((curtextbegin = backward_delimiter(text, beginpageptr, GD_pattern, GD_length, 0)) > beginpageptr) break;
- if (beginpage > 0) {
- if (beginpageptr - pagesize < &matched_region[MAXPATT]) {
- if ((num = beginpageptr - &matched_region[MAXPATT]) <= 0) break;
- }
- else num = pagesize;
- beginpage -= num;
- beginpageptr -= num;
- }
- else break;
- times ++;
- fseek(infp, beginpage, 0);
- fread(beginpageptr, 1, num, infp);
- }
- }
- else {
- prevoffset = (*p1)->offset;
- text = &matched_region[MAX_REGION_LIMIT+MAXPATT];
- endpage = beginpage = ((*p1)->offset / pagesize) * pagesize;
- /* endpage = (((*p1)->offset + pagesize) / pagesize) * pagesize */
- endpageptr = beginpageptr = text - ((*p1)->offset - beginpage);
- /* endpageptr = text + (endpage - (*p1)->offset); */
- curtextbegin = curtextend = text;
- times = 0;
- while (times < MAXPGTIMES) {
- fseek(infp, endpage, 0);
- num = (&matched_region[MAX_REGION_LIMIT*2+MAXPATT] - endpageptr < pagesize) ? (&matched_region[MAX_REGION_LIMIT*2+MAXPATT*2] - endpageptr) : pagesize;
- if ((num = fread(endpageptr, 1, num, infp)) <= 0) break;
- endpage += num;
- endpageptr += num;
- if (endpageptr <= text) {
- curtextend = text; /* error in value of offset: file was modified and offsets no longer true: your RISK! */
- break;
- }
- if (((curtextend = forward_delimiter(text, endpageptr, GD_pattern, GD_length, 1)) < endpageptr) ||
- (endpageptr >= &matched_region[MAX_REGION_LIMIT + 2*MAXPATT])) break;
- times ++;
- }
- times = 0;
- while (times < MAXPGTIMES) { /* I have already read the initial page since endpage is beginpage initially */
- if ((curtextbegin = backward_delimiter(text, beginpageptr, GD_pattern, GD_length, 0)) > beginpageptr) break;
- if (beginpage > 0) {
- if (beginpageptr - pagesize < &matched_region[MAXPATT]) {
- if ((num = beginpageptr - &matched_region[MAXPATT]) <= 0) break;
- }
- else num = pagesize;
- beginpage -= num;
- beginpageptr -= num;
- }
- else break;
- times ++;
- fseek(infp, beginpage, 0);
- fread(beginpageptr, 1, num, infp);
- }
- }
- #else /*0*/
- /* Find forward delimiter (including delimiter) */
- times = 0;
- fseek(infp, (*p1)->offset, 0);
- while (times < MAXTIMES) {
- if ((num = fread(text+RegionLimit*times, 1, RegionLimit, infp)) > 0)
- curtextend = forward_delimiter(text, text+RegionLimit*times+num, GD_pattern, GD_length, 1);
- if ((curtextend < text+RegionLimit*times+num) || (num < RegionLimit)) break;
- times ++;
- }
- /* Find backward delimiter (including delimiter) */
- times = 0;
- while (times < MAXTIMES) {
- num = ((*p1)->offset - RegionLimit*(times+1)) > 0 ? ((*p1)->offset - RegionLimit*(times+1)) : 0;
- fseek(infp, num, 0);
- if (num > 0) {
- fread(text-RegionLimit*(times+1), 1, RegionLimit, infp);
- curtextbegin = backward_delimiter(text, text-RegionLimit*(times+1), GD_pattern, GD_length, 0);
- }
- else {
- fread(text-RegionLimit*times-(*p1)->offset, 1, (*p1)->offset, infp);
- curtextbegin = backward_delimiter(text, text-RegionLimit*times-(*p1)->offset, GD_pattern, GD_length, 0);
- }
- if ((num <= 0) || (curtextbegin > text-RegionLimit*(times+1))) break;
- times ++;
- }
- #endif /*0*/
-
- /* set interval and delete the entry */
- begininterval = (*p1)->offset - (text - curtextbegin);
- endinterval = (*p1)->offset + (curtextend - text);
- #if DEBUG
- *(curtextend + 1) = '\0';
- printf("%s [%d < %d < %d] = %s\n", CurrentFileName, begininterval, (*p1)->offset, endinterval, curtextbegin);
- #endif /*DEBUG*/
- tp1 = *p1;
- *p1 = (*p1)->next;
- my_free(tp1, sizeof(struct offsets));
- if (curtextend <= curtextbegin) continue; /* error in offsets/delims */
-
- /*
- * Don't call memagrep since that is heavy weight. Call exec
- * directly after doing agrep_search()'s preprocessing here.
- */
- CurrentByteOffset = begininterval+1;
- SetCurrentByteOffset = 1;
- use_previous_state = 1;
- first_search = 1;
- if (first_search) {
- if ((ret = memagrep_search(AM, APattern, curtextend-curtextbegin, curtextbegin, 0, outfp)) > 0)
- totalret ++; /* += ret */
- else if ((ret < 0) && REGEX && (WORDBOUND || DELIMITER)) {
- fclose(infp);
- return -1;
- }
- first_search = 0;
- }
- else { /* All agrep globals are properly set */
- agrep_finalfp = (FILE *)outfp;
- agrep_outlen = 0;
- agrep_outbuffer = NULL;
- agrep_outpointer = 0;
- execfd = agrep_initialfd = -1;
- agrep_inbuffer = curtextbegin;
- agrep_inlen = curtextend - curtextbegin;
- agrep_inpointer = 0;
- if ((ret = exec(-1, NULL)) > 0)
- totalret ++; /* += ret; */
- else if ((ret < 0) && REGEX && (WORDBOUND || DELIMITER)) {
- fclose(infp);
- return -1;
- }
- }
-
- if ((LIMITOUTPUT > 0) && (LIMITOUTPUT <= totalret)) break; /* done */
- if ((totalret > 0) && FILENAMEONLY) break;
- }
-
- SetCurrentByteOffset = 0;
- fclose(infp);
- if (totalret > 0) { /* dirty solution: must handle part of agrep here */
- if (COUNT && !FILEOUT) {
- if(!NOFILENAME) fprintf(outfp, "%s: %d\n", CurrentFileName, totalret);
- else fprintf(outfp, "%d\n", totalret);
- }
- if (FILEOUT) file_out(CurrentFileName);
- }
- return totalret;
- }
-
- #if CLIENTSERVER
- int
- mystrlen(str, max)
- char *str;
- int max;
- {
- int i=0;
-
- while ((i<max) && (str[i] != '\0')) i++;
- return i;
- }
-
- readn(fd, ptr, nbytes)
- int fd;
- char *ptr;
- int nbytes;
- {
- int nleft, nread;
-
- nleft = nbytes;
- while (nleft > 0) {
- nread = read(fd, ptr, nleft);
- if (nread < 0) return(nread);
- else if (nread == 0) break; /* EOF */
- nleft -= nread;
- ptr += nread;
- }
- return (nbytes - nleft);
- }
-
- writen(fd, ptr, nbytes)
- int fd;
- char *ptr;
- int nbytes;
- {
- int nleft, nwritten;
-
- nleft = nbytes;
- while (nleft > 0) {
- nwritten = write(fd, ptr, nleft);
- if (nwritten <= 0) return nwritten;
- nleft -= nwritten;
- ptr += nwritten;
- }
- return (nbytes - nleft);
- }
-
- int
- readline(sockfd, ptr, maxlen)
- int sockfd;
- char *ptr;
- int maxlen;
- {
- int n, rc;
- char c;
-
- for (n=1; n<maxlen; n++) {
- if ((rc = readn(sockfd, &c, 1)) == 1) {
- *ptr++ = c;
- if (c == '\n') break;
- } else if (rc == 0) {
- if (n==1) return (0); /* EOF */
- else break;
- } else return (-1);
- }
- *ptr = 0;
- return n;
- }
-
- #if USE_MSGHDR
- /*
- * This piece of code was causing compilation problems.
- * It was not being used anyway. So it has been deleted.
- * -bg, Jan 4th 95
- */
-
- int
- sendfile(sockfd, fds, num)
- int sockfd, fds[], num;
- {
- struct iovec iov[1];
- struct msghdr msg;
- int ret;
-
- iov[0].iov_base = (char *) NULL;
- iov[0].iov_len = 0;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (caddr_t) NULL;
- msg.msg_namelen = 0;
- msg.msg_accrights = (caddr_t) fds;
- msg.msg_accrightslen = num * sizeof(int);
-
- errno = 0;
- if ((ret = sendmsg(sockfd, &msg, 0)) < 0) {
- #if DEBUG
- printf("sendmsg ret = %x, errno = %d\n", ret, errno);
- #endif /*DEBUG*/
- return (-1);
- }
- #if DEBUG
- printf("sent fds %x %x %x, ret = %x, errno = %d\n", fds[0], fds[1], fds[2], ret, errno);
- #endif /*DEBUG*/
- return (0);
- }
-
- int
- send_clfds(sockfd, clstdin, clstdout, clstderr)
- int sockfd, clstdin, clstdout, clstderr;
- {
- int fds[3];
-
- fds[0] = clstdin;
- fds[1] = clstdout;
- fds[2] = clstderr;
- if (sendfile(sockfd, fds, 3) < 0) return -1;
- return 0;
- }
-
- int
- getfile(sockfd, fds, num)
- int sockfd, fds[], num;
- {
- struct iovec iov[1];
- struct msghdr msg;
- int ret;
-
- iov[0].iov_base = (char *) NULL;
- iov[0].iov_len = 0;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (caddr_t) NULL;
- msg.msg_namelen = 0;
- msg.msg_accrights = (caddr_t)fds;
- msg.msg_accrightslen = num*sizeof(int);
-
- errno = 0;
- if ((ret = recvmsg(sockfd, &msg, 0)) < 0) {
- #if DEBUG
- printf("bad recvmsg: ret = %x, errno = %d\n", ret, errno);
- #endif /*DEBUG*/
- return -1;
- }
- #if DEBUG
- printf("got fds %x %x %x, ret = %x, errno = %d\n", fds[0], fds[1], fds[2], ret, errno);
- #endif /*DEBUG*/
- return 0;
- }
-
- int
- get_clfds(sockfd, pclstdin, pclstdout, pclstderr)
- int sockfd, *pclstdin, *pclstdout, *pclstderr;
- {
- int fds[3];
-
- if (getfile(sockfd, fds, 3) < 0) return -1;
- if (((*pclstdin = fds[0]) < 0) || (*pclstdin >= 20)) return -1;
- if (((*pclstdout = fds[1]) < 0) || (*pclstdout >= 20)) return -1;
- if (((*pclstderr = fds[2]) < 0) || (*pclstderr >= 20)) return -1;
- return 0;
- }
- #endif /*USE_MSGHDR*/
-
- int
- linearize(sockfd, reqbuf, reqlen, argc, argv, pid)
- int sockfd;
- int reqlen, argc;
- char *reqbuf, *argv[];
- int pid;
- {
- int i;
- unsigned char array[4];
- int ptr = 0;
- int len;
-
- array[0] = (pid & 0xff000000) >> 24;
- array[1] = (pid & 0xff0000) >> 16;
- array[2] = (pid & 0xff00) >> 8;
- array[3] = (pid & 0xff);
- if (sockfd >= 0) {
- if (writen(sockfd, array, 4) < 4) return -1;
- }
- if (reqbuf != NULL) {
- if (ptr + 4 >= reqlen) return -1;
- memcpy(reqbuf+ptr, array, 4);
- ptr += 4;
- }
-
- array[0] = (argc & 0xff000000) >> 24;
- array[1] = (argc & 0xff0000) >> 16;
- array[2] = (argc & 0xff00) >> 8;
- array[3] = (argc & 0xff);
- if (sockfd >= 0) {
- if (writen(sockfd, array, 4) < 4) return -1;
- }
- if (reqbuf != NULL) {
- if (ptr + 4 >= reqlen) return -1;
- memcpy(reqbuf+ptr, array, 4);
- ptr += 4;
- }
-
- for (i=0; i<argc; i++) {
- len = strlen(argv[i]);
- if (sockfd >= 0) {
- if (writen(sockfd, argv[i], len + 1) < len + 1) return -1;
- if (writen(sockfd, "\n", 1) < 1) return -1; /* so that we can do gets */
- }
- if (reqbuf != NULL) {
- if (ptr + len + 2 >= reqlen) return -1;
- strcpy(reqbuf+ptr, argv[i]);
- ptr += len+1;
- reqbuf[ptr++] = '\0'; /* so that we can do strcpy */
- }
- #if DEBUG
- printf("sending %s\n", argv[i]);
- #endif /*DEBUG*/
- }
- return ptr;
- }
-
- int
- delinearize(sockfd, reqbuf, reqlen, pargc, pargv, ppid)
- int sockfd;
- int reqlen, *pargc;
- char *reqbuf, **pargv[];
- int *ppid;
- {
- int i;
- char line[MAXLINE];
- int len;
- int ptr = 0;
- unsigned char array[4];
-
- *ppid = 0;
- *pargc = 0;
- *pargv = NULL;
- memset(array, '\0', 4);
-
- if (sockfd >= 0) if (readn(sockfd, array, 4) != 4) return -1;
- if (reqbuf != NULL) {
- if (ptr+4 >= reqlen) return -1;
- memcpy(array, reqbuf+ptr, 4);
- ptr += 4;
- }
- *ppid = (array[0] << 24) + (array[1] << 16) + (array[2] << 8) + array[3];
-
- memset(array, '\0', 4);
- if (sockfd >= 0) if (readn(sockfd, array, 4) != 4) return -1;
- if (reqbuf != NULL) {
- if (ptr+4 >= reqlen) return -1;
- memcpy(array, reqbuf+ptr, 4);
- ptr += 4;
- }
- *pargc = (array[0] << 24) + (array[1] << 16) + (array[2] << 8) + array[3];
- #if DEBUG
- printf("clargc=%x\n", *pargc);
- #endif /*DEBUG*/
- /* VERY important, set hard-coded limit to MAX_ARGS*MAX_NAME_LEN; otherwise can cause the server to allocate TONS of memory */
- if (*pargc <= 0 || *pargc >= (MAX_ARGS*MAX_NAME_LEN)) { *pargc = 0; return -1; }
-
- if ((*pargv = (char **)my_malloc(sizeof(char *) * *pargc)) == NULL) {
- /* no memory, so discard */
- *pargc = 0;
- return - 1;
- }
- memset(*pargv, '\0', sizeof(char *) * *pargc);
- for (i=0; i<*pargc; i++) {
- if (sockfd >= 0) {
- if (readline(sockfd, line, MAXLINE) <= 0) return -1;
- if ((len = mystrlen(line, MAXLINE)) <= 0) {
- i--;
- continue;
- }
- if (((*pargv)[i] = (char *)my_malloc(len + 2)) == NULL) return -1;
- line[len] = '\0'; /* overwrite the '\n' */
- strcpy((*pargv)[i], line);
- }
- if (reqbuf != NULL) {
- if ( ((len = mystrlen(reqbuf+ptr, reqlen-ptr)) <= 0) || (len >= MAXLINE) ) return -1;
- if (((*pargv)[i] = (char *)my_malloc(len + 2)) == NULL) return -1;
- strcpy((*pargv)[i], reqbuf+ptr);
- ptr += len + 2;
- }
- #if DEBUG
- printf("clargv[%x]=%s\n", i, (*pargv)[i]);
- #endif /*DEBUG*/
- }
- return ptr;
- }
-
- int
- sendreq(sockfd, clstdin, clstdout, clstderr, clargc, clargv, clpid)
- int sockfd, clstdin, clstdout, clstderr, clargc, clpid;
- char *clargv[];
- {
- #if USE_MSGHDR
- struct iovec iov[1];
- struct msghdr msg;
- int ret;
- int fds[3];
- #endif /*USE_MSGHDR*/
-
- #if USE_MSGHDR
- if ((ret = linearize(-1, glimpse_reqbuf, MAX_ARGS*MAX_NAME_LEN, clargc, clargv, clpid)) < 0) return -1;
-
- fds[2] = clstdin;
- fds[1] = clstdout;
- fds[0] = clstderr;
-
- iov[0].iov_base = (char *) glimpse_reqbuf;
- iov[0].iov_len = ret;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (caddr_t) NULL;
- msg.msg_namelen = 0;
- msg.msg_accrights = (caddr_t) fds;
- msg.msg_accrightslen = 2 * sizeof(int); /* don't send clstdin */
-
- errno = 0;
- if ((ret = sendmsg(sockfd, &msg, 0)) < 0) {
- #if DEBUG
- printf("sendmsg ret = %x, errno = %d\n", ret, errno);
- #endif /*DEBUG*/
- return (-1);
- }
- #if DEBUG
- printf("sendreq %x %x %x, ret = %x, errno = %d\n", fds[0], fds[1], fds[2], ret, errno);
- #endif /*DEBUG*/
- #else /*USE_MSGHDR*/
- if (linearize(sockfd, (char *)NULL, MAX_ARGS*MAX_NAME_LEN, clargc, clargv, clpid) < 0) return -1;
- #endif /*USE_MSGHDR*/
- return (0);
- }
-
- int
- getreq(sockfd, pclstdin, pclstdout, pclstderr, pclargc, pclargv, pclpid)
- int sockfd, *pclstdin, *pclstdout, *pclstderr, *pclargc, *pclpid;
- char **pclargv[];
- {
- #if USE_MSGHDR
- struct iovec iov[1];
- struct msghdr msg;
- int ret;
- int fds[3];
- #endif /*USE_MSGHDR*/
-
- #if USE_MSGHDR
- iov[0].iov_base = (char *) glimpse_reqbuf;
- iov[0].iov_len = MAX_ARGS * MAX_NAME_LEN;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (caddr_t) NULL;
- msg.msg_namelen = 0;
- msg.msg_accrights = (caddr_t)fds;
- msg.msg_accrightslen = 2*sizeof(int);
-
- errno = 0;
- if ((ret = recvmsg(sockfd, &msg, 0)) < 0) {
- #if DEBUG
- printf("bad recvmsg: ret = %x, errno = %d\n", ret, errno);
- #endif /*DEBUG*/
- return -1;
- }
-
- *pclstdin = fds[2];
- *pclstdout = fds[1];
- *pclstderr = fds[0];
-
- if ((ret == delinearize(-1, glimpse_reqbuf, MAX_ARGS * MAX_NAME_LEN, pclargc, pclargv, pclpid)) < 0) return -1;
- #if DEBUG
- printf("getreq %x %x %x, ret = %x, errno = %d\n", fds[0], fds[1], fds[2], ret, errno);
- #endif /*DEBUG*/
- #else /*USE_MSGHDR*/
- if (delinearize(sockfd, (char *)NULL, MAX_ARGS * MAX_NAME_LEN, pclargc, pclargv, pclpid) < 0) return -1;
- *pclstdin = -1;
- *pclstdout = sockfd;
- *pclstderr = sockfd;
- #endif /*USE_MSGHDR*/
- return (0);
- }
-
- #endif /*CLIENTSERVER*/
-
- read_index(indexdir)
- char indexdir[MAXNAME];
- {
- char *home;
- char s[MAXNAME];
- int ret;
-
- if (indexdir[0] == '\0') {
- if ((home = (char *)getenv("HOME")) == NULL) {
- getcwd(indexdir, MAXNAME-1);
- fprintf(stderr, "using working-directory '%s' to locate index\n", indexdir);
- }
- else strncpy(indexdir, home, MAXNAME);
- }
- ret = chdir(indexdir);
- if (getcwd(INDEX_DIR, MAXNAME-1) == NULL) strcpy(INDEX_DIR, indexdir);
- if (ret < 0) {
- fprintf(stderr, "using working-directory '%s' to locate index\n", INDEX_DIR);
- }
-
- sprintf(s, "%s", INDEX_FILE);
- indexfp = fopen(s, "r");
- if(indexfp == NULL) {
- fprintf(stderr, "can't open glimpse index-file %s/%s\n", INDEX_DIR, INDEX_FILE);
- fprintf(stderr, "(use -H to give an index-directory or run 'glimpseindex' to make an index)\n");
- return -1;
- }
- if (stat(s, &index_stat_buf) == -1) {
- fprintf(stderr, "can't stat %s/%s\n", INDEX_DIR, s);
- fclose(indexfp);
- return -1;
- }
-
- sprintf(s, "%s", P_TABLE);
- partfp = fopen(s, "r");
- if(partfp == NULL) {
- fprintf(stderr, "can't open glimpse partition-table %s/%s\n", INDEX_DIR, P_TABLE);
- fprintf(stderr, "(use -H to specify an index-directory or run glimpseindex to make an index)\n");
- return -1;
- }
-
- /* Get options */
- #if BG_DEBUG
- debug = fopen(DEBUG_FILE, "w+");
- if(debug == NULL) {
- fprintf(stderr, "can't open file %s/%s, errno=%d\n", INDEX_DIR, DEBUG_FILE, errno);
- return(-1);
- }
- #endif /*BG_DEBUG*/
- fgets(indexnumberbuf, 256, indexfp);
- if(strstr(indexnumberbuf, "1234567890")) IndexNumber = ON;
- else IndexNumber = OFF;
- fscanf(indexfp, "%%%d\n", &OneFilePerBlock);
- if (OneFilePerBlock < 0) {
- ByteLevelIndex = ON;
- OneFilePerBlock = -OneFilePerBlock;
- }
- fscanf(indexfp, "%%%d\n", &StructuredIndex);
- /* Set WHOLEFILESCOPE for do-it-yourself request processing at client */
- WHOLEFILESCOPE = 1;
- if (StructuredIndex <= 0) {
- WHOLEFILESCOPE = 0;
- StructuredIndex = 0;
- PRINTATTR = 0; /* doesn't make sense: must not go into filter_output */
- }
- else if (-1 == (StructuredIndex = attr_load_names(ATTRIBUTE_FILE))) {
- fprintf(stderr, "error in reading attribute file %s/%s\n", INDEX_DIR, ATTRIBUTE_FILE);
- return(-1);
- }
- #if BG_DEBUG
- fprintf(debug, "buf = %s OneFilePerBlock=%d StructuredIndex=%d\n", indexnumberbuf, OneFilePerBlock, StructuredIndex);
- #endif /*BG_DEBUG*/
- fclose(indexfp);
- indexfp = NULL;
-
- /* Once IndexNumber info is available */
- set_indexable_char(indexable_char);
- set_indexable_char(test_indexable_char);
- set_special_char(indexable_char);
- return 0;
- }
-
- /* MUST CARE IF PIPE/SOCKET IS BROKEN! ALSO SIGUSR1 (hardy@cs.colorado.edu) => QUIT CURRENT REQUEST. */
- int ignore_signal[32] = { 0,
- 0, 0, 1, 1, 1, 1, 1, 1, /* all the tracing stuff: since default action is to dump core */
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0 }; /* resource lost: since default action is to dump core */
-
- /* S.t. sockets don't persist: they sometimes have a bad habit of doing so */
- void
- cleanup()
- {
- int i, q, k;
-
- /* ^C in the middle of a client call */
- if (svstderr != 2) {
- close(2);
- dup(svstderr);
- }
- fprintf(stderr, "server cleaning up...\n");
- for (i=0; i<64; i++) close(i);
-
- if (ByteLevelIndex) {
- if (src_offset_table != NULL) for (k=0; k<OneFilePerBlock; k++) {
- free_list(&src_offset_table[k]);
- }
- for (q=0; q<MAXNUM_PAT; q++) {
- if (multi_dest_offset_table[q] != NULL) for (k=0; k<OneFilePerBlock; k++) {
- free_list(&multi_dest_offset_table[q][k]);
- }
- }
- }
-
- exit(3);
- }
-
- #define QUITREQUESTMSG "glimpseserver: aborting request...\n"
- /* S.t. one request doesn't keep server occupied too long, when client already quits */
- void quitrequest(s)
- int s;
- {
- /*
- * Don't write onto stderr, since 2 is duped to sockfd => can cause recursive signal!
- * Also, don't print error message more than once for quitting one request. The
- * server receives signals for EVERY write it attempts when it finds a match: I could
- * not find a way to prevent it, but agrep/bitap.c/fill_buf() was fixed to limit it.
- * -- bg on 16th Feb 1995
- */
- if (!glimpse_clientdied && (s != SIGUSR1)) /* USR1 is a "friendly" cleanup message */
- write(svstderr, QUITREQUESTMSG, strlen(QUITREQUESTMSG));
-
- glimpse_clientdied = 1;
- #ifdef __svr4__
- /* Solaris 2.3 insists that you reset the signal handler */
- (void)signal(s, quitrequest);
- #endif
- }
-
- main(argc, argv)
- int argc;
- char *argv[];
- {
- int ret;
- char indexdir[MAXNAME];
- char **oldargv = argv;
- int oldargc = argc;
- #if CLIENTSERVER
- int sockfd, newsockfd, clilen, len, clpid;
- int clout;
- #if USE_UNIXDOMAIN
- struct sockaddr_un cli_addr, serv_addr;
- #else /*USE_UNIXDOMAIN*/
- struct sockaddr_in cli_addr, serv_addr;
- struct hostent *hp;
- #endif /*USE_UNIXDOMAIN*/
- int cli_len;
- int clargc;
- char **clargv;
- int clstdin, clstdout, clstderr;
- int i;
- char array[4];
- char *p, c;
- #endif /*CLIENTSERVER*/
- int quitwhile;
-
- #if CLIENTSERVER && ISSERVER
- glimpse_isserver = 1; /* I am the server */
- #else /*CLIENTSERVER && ISSERVER*/
- if (argc <= 1) return(usage()); /* Client nees at least 1 argument */
- #endif /*CLIENTSERVER && ISSERVER*/
-
- #define RETURNMAIN(val)\
- {\
- if (indexfp != NULL) fclose(indexfp);\
- if (partfp != NULL) fclose(partfp);\
- if (nullfp != NULL) fclose(nullfp);\
- indexfp = partfp = nullfp = NULL;\
- if (StructuredIndex) {\
- attr_free_table();\
- }\
- return (val);\
- }
-
- /* once-only initialization */
- gethostname(SERV_HOST, MAXNAME - 2);
- SERV_PORT = DEF_SERV_PORT;
- srand(getpid());
- umask(077);
- strcpy(&GProgname[0], argv[0]);
- region_initialize();
- indexfp = partfp = nullfp = NULL;
- if ((nullfp = fopen("/dev/null", "w")) == NULL) {
- fprintf(stderr, "%s: cannot open for writing: /dev/null, errno=%d\n", argv[0], errno);
- RETURNMAIN(-1);
- }
- InterpretSpecial = ON;
- GMAX_WORD_SIZE = MAXPAT;
- src_offset_table = NULL;
- for (i=0; i<MAXNUM_PAT; i++) multi_dest_offset_table[i] = NULL;
-
- #if CLIENTSERVER
- #if !ISSERVER
- /* Check if client has too many arguments: then it is surely running as agrep since I have < half those options! */
- if (argc > MAX_ARGS) goto doityourself;
- #endif /*!ISSERVER*/
-
- while((--argc > 0) && (*++argv)[0] == '-' ) {
- p = argv[0] + 1; /* ptr to first character after '-' */
- c = *(argv[0]+1);
- quitwhile = OFF;
- while (!quitwhile && (*p != '\0')) {
- c = *p;
- switch(c) {
- /* Look for -H option at server (only one that makes sense); if client has a -H, then it goes to doityourself */
- case 'H' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: a directory name must follow the -H option\n", GProgname);
- RETURNMAIN(usageS());
- }
- argv ++;
- strcpy(indexdir, argv[0]);
- argc --;
- }
- else {
- strcpy(indexdir, p+1);
- }
- quitwhile = ON;
- break;
-
- /* Recognized by both client and server */
- case 'J' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: the server host name must follow the -J option\n", GProgname);
- #if ISSERVER
- RETURNMAIN(usageS());
- #else /*ISSERVER*/
- RETURNMAIN(usage());
- #endif /*ISSERVER*/
- }
- argv ++;
- strcpy(SERV_HOST, argv[0]);
- argc --;
- }
- else {
- strcpy(SERV_HOST, p+1);
- }
- quitwhile = ON;
- break;
-
- /* Recognized by both client and server */
- case 'K' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: the server port must follow the -C option\n", GProgname);
- #if ISSERVER
- RETURNMAIN(usageS());
- #else /*ISSERVER*/
- RETURNMAIN(usage());
- #endif /*ISSERVER*/
- }
- argv ++;
- SERV_PORT = atoi(argv[0]);
- argc --;
- }
- else {
- SERV_PORT = atoi(p+1);
- }
- if ((SERV_PORT < MIN_SERV_PORT) || (SERV_PORT > MAX_SERV_PORT)) {
- fprintf(stderr, "Bad server port %d: must be in [%d, %d]: using default %d\n",
- SERV_PORT, MIN_SERV_PORT, MAX_SERV_PORT, DEF_SERV_PORT);
- SERV_PORT = DEF_SERV_PORT;
- }
- quitwhile = ON;
- break;
-
- #if ISSERVER
- /* server cannot recognize any other option */
- default :
- fprintf(stderr, "%s: server cannot recognize option: '%s'\n", GProgname, p);
- RETURNMAIN(usageS());
- #else /*ISSERVER*/
-
- /* These have 1 argument each, so must do quitwhile */
- case 'd' :
- case 'e' :
- case 'k' :
- case 'D' :
- case 'F' :
- case 'I' :
- case 'L' :
- case 'R' :
- case 'S' :
- case 'T' :
- if (argv[0][2] == '\0') {/* space after - option */
- if(argc <= 1) {
- fprintf(stderr, "%s: the '-%c' option must have an argument\n", GProgname, c);
- RETURNMAIN(usage());
- }
- argv++;
- argc--;
- }
- quitwhile = ON;
- break;
-
- /* These are illegal */
- case 'f' :
- case 'm' :
- case 'p' :
- case 'v' :
- fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);
- RETURNMAIN(usage());
- break;
-
- /* They can't be patterns and filenames since they start with a -, these don't have arguments */
- case 'a' :
- case 'b' :
- case 'c' :
- case 'h' :
- case 'i' :
- case 'l' :
- case 'n' :
- case 'o' :
- case 'r' :
- case 's' :
- case 't' :
- case 'w' :
- case 'x' :
- case 'y' :
- case 'z' :
- case 'A' :
- case 'B' :
- case 'G' :
- case 'M' :
- case 'N' :
- case 'O' :
- case 'P' :
- case 'W' :
- case 'Z' :
- break;
-
- case 'C':
- CONTACT_SERVER = 1;
- break;
-
- case 'V' :
- printf("\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- RETURNMAIN(0);
-
- default :
- if (isdigit(c)) quitwhile = ON;
- else {
- fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);
- RETURNMAIN(usage());
- }
- break;
- #endif /*ISSERVER*/
- } /* switch(c) */
- p ++;
- }
- }
-
- #if !ISSERVER
- /* Next arg must be the pattern: Check if the user wants to run the client as agrep, or doesn't want to contact the server */
- if ((argc > 1) || (!CONTACT_SERVER)) goto doityourself;
- #endif /*!ISSERVER*/
-
- argv = oldargv;
- argc = oldargc;
- #endif /*CLIENTSERVER*/
-
- #if ISSERVER && CLIENTSERVER
- if (-1 == read_index(indexdir)) RETURNMAIN(ret);
- for (i=0; i<32; i++)
- if (ignore_signal[i]) signal(i, SIG_IGN);
- signal(SIGHUP, cleanup);
- signal(SIGINT, cleanup);
- #if defined(sco)
- if (((void (*)())-1 == signal(SIGPIPE, quitrequest)) ||
- ((void (*)())-1 == signal(SIGUSR1, quitrequest)))
- #else
- if (((void (*)())-1 == signal(SIGPIPE, quitrequest)) ||
- ((void (*)())-1 == signal(SIGUSR1, quitrequest)) ||
- ((void (*)())-1 == signal(SIGURG, quitrequest)))
- #endif
- {
- /* Check for return values here since they ensure reliability */
- fprintf(stderr, "glimpseserver: Unable to install signal-handlers.\n");
- RETURNMAIN(-1);
- }
-
- #if USE_UNIXDOMAIN
- if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
- fprintf(stderr, "server cannot open socket for communication.\n");
- RETURNMAIN(-1);
- }
- unlink("/tmp/.glimpse_server");
- memset((char *)&serv_addr, '\0', sizeof(serv_addr));
- serv_addr.sun_family = AF_UNIX;
- strcpy(serv_addr.sun_path, "/tmp/.glimpse_server"); /* < 108 ! */
- len = strlen(serv_addr.sun_path) + sizeof(serv_addr.sun_family);
- #else /*USE_UNIXDOMAIN*/
- if ((sockfd = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
- perror("glimpseserver: Cannot create socket");
- RETURNMAIN(-1);
- }
- memset((char *)&serv_addr, '\0', sizeof(serv_addr));
- serv_addr.sin_family = AF_INET;
- serv_addr.sin_port = htons(SERV_PORT);
- #if 0
- /* use host-names not internet style d.d.d.d notation */
- serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
- #else
- /*
- * We only want to accept connections from glimpse clients
- * on the SERV_HOST, do not use INADDR_ANY!
- */
- if ((hp = gethostbyname(SERV_HOST)) == NULL) {
- perror("glimpseserver: Cannot resolve host");
- RETURNMAIN(-1);
- }
- memcpy((caddr_t)&serv_addr.sin_addr, hp->h_addr, hp->h_length);
- #endif /*0*/
- len = sizeof(serv_addr);
- #endif /*USE_UNIXDOMAIN*/
-
- if (bind(sockfd, (struct sockaddr *)&serv_addr, len) < 0) {
- perror("glimpseserver: Cannot bind to socket");
- RETURNMAIN(-1);
- }
- listen(sockfd, SERVER_QUEUE_SIZE);
-
- printf("glimpseserver: On-line (pid = %d, port = %d) waiting for request...\n", getpid(), SERV_PORT);
- fflush(stdout); /* must fflush to print on server stdout */
- while (1) {
- /*
- * Spin until sockfd is ready to do a non-blocking accept(2).
- * We only wait for 15 seconds, because SunOS may
- * swap us out if we block for 20 seconds or more.
- * -- Courtesy: Darren Hardy, hardy@cs.colorado.edu
- */
- if (do_select(sockfd, 15) != 1)
- continue;
- /* get parameters */
- ret = 0;
- clargc = 0;
- clargv = NULL;
- cli_len = sizeof(cli_addr);
- if ((newsockfd = accept(sockfd, &cli_addr, &cli_len)) < 0) continue;
- if (getreq(newsockfd, &clstdin, &clstdout, &clstderr, &clargc, &clargv, &clpid) < 0) {
- ret = -1;
- #if DEBUG
- printf("getreq errno: %d\n", errno);
- #endif /*DEBUG*/
- goto end_process;
- }
-
- #if DEBUG
- printf("server processing request on %x\n", newsockfd);
- #endif /*DEBUG*/
- /*
- * Server doesn't wait for response, no point using
- svstdin = dup(0);
- close(0);
- dup(clstdin);
- close(clstdin);
- */
- /*
- * This is wrong since clstderr == clstdout!
- svstdout = dup(1);
- close(1);
- dup(clstdout);
- close(clstdout);
- svstderr = dup(2);
- close(2);
- dup(clstderr);
- close(clstderr);
- */
- svstdout = dup(1);
- svstderr = dup(2);
- close(1);
- close(2);
- dup(clstdout);
- dup(clstderr);
- close(clstdout);
- close(clstderr);
-
- /*
- * IMPORTANT: Unbuffered I/O to the client!
- * Done for Harvest since partial results might be
- * needed and fflush will not flush partial results
- * to the client if we type ^C and kill it: it puts
- * them into /dev/null. This way, output is unbuffered
- * and the client sees at least some results if killed.
- */
- setbuf(stdout, NULL);
- setbuf(stderr, NULL);
-
- glimpse_call = 0;
- glimpse_clientdied = 0;
- ret = process_query(clargc, clargv);
- /*
- * Server doesn't wait for response, no point using
- close(0);
- dup(svstdin);
- close(svstdin);
- svstdin = 0;
- */
- if (glimpse_clientdied) {
- /*
- * This code is *ONLY* used as a safety net now.
- * The old problem was that users would see portions
- * of previous (and usually) unrelated queries!
- * glimpseserver now uses unbuffered I/O to the
- * client so all previous fwrite's to now are
- * gone. But since this is such a nasty problem
- * we flush stdout to /dev/null just in case.
- */
- clout = open("/dev/null", O_WRONLY);
- close(1);
- dup(clout);
- close(clout);
- fflush(stdout);
- }
-
- /* Restore svstdout and svstdout to stdout/stderr */
- close(1);
- dup(svstdout);
- close(svstdout);
- svstdout = 1;
- close(2);
- dup(svstderr);
- close(svstderr);
- svstderr = 2;
-
- end_process:
- #if USE_MSGHDR
- /* send reply and cleanup */
- array[0] = (ret & 0xff000000) >> 24;
- array[1] = (ret & 0xff0000) >> 16;
- array[2] = (ret & 0xff00) >> 8;
- array[3] = (ret & 0xff);
- writen(newsockfd, array, 4);
- #endif /*USE_MSGHDR*/
- #if DEBUG
- write(1, "done\n", 5);
- #endif /*DEBUG*/
- for (i=0; i<clargc; i++)
- if (clargv[i] != NULL) my_free(clargv[i], 0);
- if (clargv != NULL) my_free(clargv, 0);
- close(newsockfd); /* if !USE_MSGHDR, client directly reads from socket and writes onto stdout until EOF */
- }
- #else /*ISSERVER && CLIENTSERVER*/
-
- #if CLIENTSERVER
- #if USE_UNIXDOMAIN
- if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
- perror("socket");
- goto doityourself;
- }
- memset((char *)&serv_addr, '\0', sizeof(serv_addr));
- serv_addr.sun_family = AF_UNIX;
- strcpy(serv_addr.sun_path, "/tmp/.glimpse_server"); /* < 108 ! */
- len = strlen(serv_addr.sun_path) + sizeof(serv_addr.sun_family);
- #else /*USE_UNIXDOMAIN*/
- if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
- perror("socket");
- goto doityourself;
- }
- serv_addr.sin_family = AF_INET;
- serv_addr.sin_port = htons(SERV_PORT);
- #if 0
- /* use host-names not internet style d.d.d.d notation */
- serv_addr.sin__addr.s_addr = inet_addr(SERV_HOST);
- #else /*0*/
- if ((hp = gethostbyname(SERV_HOST)) == NULL) {
- fprintf(stderr, "gethostbyname (%s) failed\n", SERV_HOST);
- goto doityourself;
- }
- memcpy((caddr_t)&serv_addr.sin_addr, hp->h_addr, hp->h_length);
- #endif /*0*/
- len = sizeof(serv_addr);
- #endif /*USE_UNIXDOMAIN*/
-
- if (connect(sockfd, (struct sockaddr *)&serv_addr, len) < 0) {
- char errbuf[4096];
- sprintf(errbuf, "glimpse: Cannot contact glimpseserver: %s, port %d:", SERV_HOST, SERV_PORT);
- perror(errbuf);
- /* perror(SERV_HOST); */
- #if DEBUG
- printf("connect errno: %d\n", errno);
- #endif /*DEBUG*/
- close(sockfd);
- goto doityourself;
- }
-
- if (sendreq(sockfd, fileno(stdin), fileno(stdout), fileno(stderr), argc, argv, getpid()) < 0) {
- perror("sendreq");
- #if DEBUG
- printf("sendreq errno: %d\n", errno);
- #endif /*DEBUG*/
- close(sockfd);
- goto doityourself;
- }
-
- #if USE_MSGHDR
- if (readn(sockfd, array, 4) != 4) {
- close(sockfd);
- goto doityourself;
- }
- ret = (array[0] << 24) + (array[1] << 16) + (array[2] << 8) + array[3];
- #else /*USE_MSGHDR*/
- {
- /*
- * Dump everything the server writes into the socket onto
- * stdout until EOF/error. Do this in a way so that *everything*
- * the server sends is dumped to stdout by the client. The
- * client might die suddenly via ^C or SIGTERM, but we still
- * want the results.
- */
- char tmpbuf[1024];
- int n;
-
- while ((n = read(sockfd, tmpbuf, 1024)) > 0) {
- write(fileno(stdout), tmpbuf, n);
- }
- }
- #endif /*USE_MSGHDR*/
-
- close(sockfd);
- RETURNMAIN(ret);
-
- doityourself:
- #if DEBUG
- printf("doing it myself :-(\n");
- #endif /*DEBUG*/
- #endif /*CLIENTSERVER*/
- setbuf(stdout, NULL); /* Unbuffered I/O to always get every result */
- setbuf(stderr, NULL);
- glimpse_call = 0;
- glimpse_clientdied = 0;
- ret = process_query(oldargc, oldargv);
- RETURNMAIN(ret);
- #endif /*ISSERVER && CLIENTSERVER*/
- }
-
- process_query(argc, argv)
- int argc;
- char *argv[];
- {
- int searchpercent;
- int num_blocks;
- int i, j;
- int iii; /* Udi */
- int jjj;
- char c;
- char *p;
- int ret;
- int jj;
- int quitwhile;
- char indexdir[MAX_LINE_LEN];
- int oldargc = argc;
- char **oldargv = argv;
- CHAR dummypat[MAX_PAT];
- int dummylen=0;
- int my_M_index, my_P_index, my_b_index, my_A_index, my_l_index = -1, my_B_index = -1;
- char **outname;
- int gnum_of_matched = 0;
- int foundpat = 0;
- int wholefilescope=0;
-
- /*
- * Macro to destroy EVERYTHING before return since we might want to make this a
- * library function later on: convention is that after destroy, objects are made
- * NULL throughout the source code, and are all set to NULL at initialization time.
- * DO agrep_argv, index_argv and FileOpt my_malloc/my_free optimizations later.
- * my_free calls have 2nd parameter = 0 if the size is not easily determinable.
- */
- #define RETURN(val) \
- {\
- int q,k;\
- \
- first_search = 0;\
- for (k=0; k<MAX_ARGS; k++) {\
- if (agrep_argv[k] != NULL) my_free(agrep_argv[k], 0);\
- if (index_argv[k] != NULL) my_free(index_argv[k], 0);\
- agrep_argv[k] = index_argv[k] = NULL;\
- }\
- if (FileOpt != NULL) my_free(FileOpt, MAXFILEOPT);\
- FileOpt = NULL;\
- for (k=0; k<MAXNUM_PAT; k++) {\
- if (pat_list[k] != NULL) my_free(pat_list[k], 0);\
- pat_list[k] = NULL;\
- }\
- sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());\
- unlink(tempfile);\
- sprintf(outname[0], "%s/.glimpse_apply.%d", TEMP_DIR, getpid());\
- unlink(outname[0]);\
- my_free(outname[0], 0);\
- my_free(outname, 0);\
- \
- if (ByteLevelIndex) {\
- if (src_offset_table != NULL) for (k=0; k<OneFilePerBlock; k++) {\
- free_list(&src_offset_table[k]);\
- }\
- /* Don't make src_offset_table itself NULL: it will be bzero-d below if !NULL */\
- for (q=0; q<MAXNUM_PAT; q++) {\
- if (multi_dest_offset_table[q] != NULL) for (k=0; k<OneFilePerBlock; k++) {\
- free_list(&multi_dest_offset_table[q][k]);\
- }\
- /* Don't make multi_dest_offset_table[q] itself NULL: it will be bzero-d below if !NULL */\
- }\
- }\
- for (k=0; k<num_terminals;k++)\
- free(terminals[k].data.leaf.value);\
- if (ComplexBoolean) destroy_tree(&GParse);\
- for (k=0; k<GNumfiles; k++) {\
- my_free(GTextfiles[k], 0);\
- GTextfiles[k] = NULL;\
- }\
- /* Don't free the GTextfiles buffer itself since it is allocated once in get_filename.c */\
- return (val);\
- }
-
- /*
- * Initialize
- */
- strcpy(&GProgname[0], argv[0]);
- if (argc <= 1) return(usage());
- strcpy(TEMP_DIR, "/tmp");
- D_length = 0;
- D = 0;
- pattern_index = 0;
- first_search = 1;
- outname = (char **)my_malloc(sizeof(char *));
- outname[0] = (char *)my_malloc(MAX_LINE_LEN);
- NOBYTELEVEL = 0;
- OPTIMIZEBYTELEVEL = 0;
- GLIMITOUTPUT = 0;
- GBESTMATCH = 0;
- GRECURSIVE = 0;
- GNOPROMPT = 0;
- GOUTTAIL = 2; /* stupid fix, but works */
- GFILENAMEONLY = 0;
- GNOFILENAME = 0;
- MATCHFILE = 0;
- PRINTATTR = 0;
- Pat_as_is=0;
- Only_first = 0;
- foundattr = 0;
- ComplexBoolean = 0;
- bestmatcherrors = 0;
- patbufpos = -1;
- RegionLimit=DEFAULT_REGION_LIMIT;
- strcpy(GD_pattern, "\n");
- GD_length = strlen(GD_pattern);
- indexdir[0] = '\0';
- memset(index_argv, '\0', sizeof(char *) * MAX_ARGS);
- index_argc = 0;
- memset(agrep_argv, '\0', sizeof(char *) * MAX_ARGS);
- agrep_argc = 0;
- FileOpt = NULL;
- fileopt_length = 0;
- memset(pat_list, '\0', sizeof(char *) * MAXNUM_PAT);
- memset(pat_attr, '\0', sizeof(int) * MAXNUM_PAT);
- for (i=0; i<MAX_ARGS; i++)
- index_argv[i] = (char *)my_malloc(MaxNameLength + 2);
- memset(is_mgrep_pat, '\0', sizeof(int) * MAXNUM_PAT);
- memset(mgrep_pat_index, '\0', sizeof(int) *MAXNUM_PAT);
- num_mgrep_pat = 0;
- memset(pat_buf, '\0', (MAXNUM_PAT + 2)*MAXPAT);
- pat_ptr = 0;
- sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());
- /* Set WHOLEFILESCOPE for per-request processing at server */
- if (StructuredIndex) WHOLEFILESCOPE = 1;
- else WHOLEFILESCOPE = 0;
-
- if (argc > MAX_ARGS) {
- #if ISSERVER
- fprintf(stderr, "too many arguments %d obtained on server!\n", argc);
- #endif /*ISSERVER*/
- i = fileagrep(oldargc, oldargv, 0, stdout);
- RETURN(i);
- }
-
- /*
- * Process what options you can, then call fileagrep_init() to set
- * options in agrep and get the pattern. Then, call fileagrep_search().
- * Begin by copying options into agrep_argv assuming glimpse was not
- * called as agrep (optimistic :-).
- */
-
- agrep_argc = 0;
- for (i=0; i<MAX_ARGS; i++) agrep_argv[i] = NULL;
- agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
- strcpy(agrep_argv[agrep_argc], argv[0]); /* copy the name of the program anyway */
- agrep_argc ++;
-
- /* In glimpse, you should never output filenames with zero matches */
- if (agrep_argc + 1 >= MAX_ARGS) {
- fprintf(stderr, "%s: too many options!\n", GProgname);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = 'z';
- agrep_argv[agrep_argc][2] = '\0';
- agrep_argc ++;
-
- /* In glimpse, you should always print pattern when using mgrep (user can't do -f or -m)! */
- if (agrep_argc + 1 >= MAX_ARGS) {
- fprintf(stderr, "%s: too many options!\n", GProgname);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = 'P';
- agrep_argv[agrep_argc][2] = '\0';
- my_P_index = agrep_argc;
- agrep_argc ++;
-
- /* In glimpse, you should always output multiple when doing mgrep */
- if (agrep_argc + 1 >= MAX_ARGS) {
- fprintf(stderr, "%s: too many options!\n", GProgname);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = 'M';
- agrep_argv[agrep_argc][2] = '\0';
- my_M_index = agrep_argc;
- agrep_argc ++;
-
- /* In glimpse, you should print the byte offset if there is a structured query */
- if (agrep_argc + 1 >= MAX_ARGS) {
- fprintf(stderr, "%s: too many options!\n", GProgname);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = 'b';
- agrep_argv[agrep_argc][2] = '\0';
- my_b_index = agrep_argc;
- agrep_argc ++;
-
- /* In glimpse, you should always have space for doing -m if required */
- if (agrep_argc + 2 >= MAX_ARGS) {
- fprintf(stderr, "%s: too many options!\n", GProgname);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = 'm';
- agrep_argv[agrep_argc][2] = '\0';
- agrep_argc ++;
- agrep_argv[agrep_argc] = (char *)my_malloc(2); /* no op */
- agrep_argv[agrep_argc][0] = '\0';
- agrep_argc ++;
-
- /* Add -A option to print filenames as default */
- if (agrep_argc + 1 >= MAX_ARGS) {
- fprintf(stderr, "%s: too many options!\n", GProgname);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = 'A';
- agrep_argv[agrep_argc][2] = '\0';
- my_A_index = agrep_argc;
- agrep_argc ++;
-
- while((agrep_argc < MAX_ARGS) && (--argc > 0) && (*++argv)[0] == '-' ) {
- p = argv[0] + 1; /* ptr to first character after '-' */
- c = *(argv[0]+1);
- quitwhile = OFF;
- while (!quitwhile && (*p != '\0')) {
- c = *p;
- switch(c) {
- case 'F' :
- MATCHFILE = ON;
- FileOpt = (CHAR *)my_malloc(MAXFILEOPT);
- if (*(p + 1) == '\0') {/* space after - option */
- if(argc <= 1) {
- fprintf(stderr, "%s: a file pattern must follow the -F option\n", GProgname);
- RETURN(usage());
- }
- argv++;
- if ((dummylen = strlen(argv[0])) > MAXFILEOPT) {
- fprintf(stderr, "%s: -F option list too long\n", GProgname);
- RETURN(usage());
- }
- strcpy(FileOpt, argv[0]);
- argc--;
- } else {
- if ((dummylen = strlen(p+1)) > MAXFILEOPT) {
- fprintf(stderr, "%s: -F option list too long\n", GProgname);
- RETURN(usage());
- }
- strcpy(FileOpt, p+1);
- } /* else */
- quitwhile = ON;
- break;
-
- /*
- * indexed search - use the exact pattern to search the index as well:
- * not implemented yet
- case 'X' :
- Pat_as_is = ON;
- break;
- */
-
- /* search the index only and output the number of blocks */
- case 'N' :
- Only_first = ON;
- break ;
-
- /* go to home directory to find the index: even if server overwrites indexdir here, it won't overwrite INDEX_DIR until read_index() */
- case 'H' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: a directory name must follow the -H option\n", GProgname);
- RETURN(usage());
- }
- argv ++;
- #if !ISSERVER
- strcpy(indexdir, argv[0]);
- #endif /*!ISSERVER*/
- argc --;
- }
- #if !ISSERVER
- else {
- strcpy(indexdir, p+1);
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(4);
- strcpy(agrep_argv[agrep_argc], "-H");
- agrep_argc ++;
- agrep_argv[agrep_argc] = (char *)my_malloc(strlen(indexdir) + 2);
- strcpy(agrep_argv[agrep_argc], indexdir);
- agrep_argc ++;
- #endif /*!ISSERVER*/
- quitwhile = ON;
- break;
-
- /* go to temp directory to create temp files */
- case 'T' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: a directory name must follow the -T option\n", GProgname);
- RETURN(usage());
- }
- argv ++;
- strcpy(TEMP_DIR, argv[0]);
- argc --;
- }
- else {
- strcpy(TEMP_DIR, p+1);
- }
- sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());
- quitwhile = ON;
- break;
-
- case 'R' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: the record size must follow the -R option\n", GProgname);
- RETURN(usage());
- }
- argv ++;
- RegionLimit = atoi(argv[0]);
- argc --;
- }
- else {
- RegionLimit = atoi(p+1);
- }
- if ((RegionLimit <= 0) || (RegionLimit > MAX_REGION_LIMIT)) {
- fprintf(stderr, "Bad record size %d: must be in [%d, %d]: using default %d\n",
- RegionLimit, 1, MAX_REGION_LIMIT, DEFAULT_REGION_LIMIT);
- RegionLimit = DEFAULT_REGION_LIMIT;
- }
- quitwhile = ON;
- break;
-
- /* doesn't matter if we overwrite the value in the client since the same value would have been picked up in main() anyway */
- case 'J' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: the server host name must follow the -J option\n", GProgname);
- RETURNMAIN(usageS());
- }
- argv ++;
- #if !ISSERVER
- strcpy(SERV_HOST, argv[0]);
- #endif /*!ISSERVER*/
- argc --;
- }
- #if !ISSERVER
- else {
- strcpy(SERV_HOST, p+1);
- }
- #endif /*!ISSERVER*/
- quitwhile = ON;
- break;
-
- /* doesn't matter if we overwrite the value in the client since the same value would have been picked up in main() anyway */
- case 'K' :
- if (*(p + 1) == '\0') {/* space after - option */
- if (argc <= 1) {
- fprintf(stderr, "%s: the server port must follow the -C option\n", GProgname);
- RETURN(usage());
- }
- argv ++;
- #if !ISSERVER
- SERV_PORT = atoi(argv[0]);
- #endif /*!ISSERVER*/
- argc --;
- }
- #if !ISSERVER
- else {
- SERV_PORT = atoi(p+1);
- }
- if ((SERV_PORT < MIN_SERV_PORT) || (SERV_PORT > MAX_SERV_PORT)) {
- fprintf(stderr, "Bad server port %d: must be in [%d, %d]: using default %d\n",
- SERV_PORT, MIN_SERV_PORT, MAX_SERV_PORT, DEF_SERV_PORT);
- SERV_PORT = DEF_SERV_PORT;
- }
- #endif /*!ISSERVER*/
- quitwhile = ON;
- break;
-
- case 'C' :
- CONTACT_SERVER = 1;
- break;
-
- case 'a' :
- PRINTATTR = 1;
- break;
-
- case 'W':
- wholefilescope = 1;
- break;
-
- case 'z' :
- UseFilters = 1;
- break;
-
- case 'r' :
- GRECURSIVE = 1;
- break;
-
- case 'V' :
- printf("\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- RETURN(0);
-
- /* Must let 'f' and 'm' fall thru to default once multipatterns are done in agrep */
- case 'f' :
- case 'p' :
- case 'm' :
- case 'v' :
- fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);
- RETURN(usage());
- break;
-
- case 'I' :
- case 'D' :
- case 'S' :
- /* There is no space after these options */
- agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
- agrep_argv[agrep_argc][0] = '-';
- strcpy(agrep_argv[agrep_argc] + 1, p);
- agrep_argc ++;
- quitwhile = ON;
- break;
-
- case 'l':
- GFILENAMEONLY = 1;
- my_l_index = agrep_argc;
- agrep_argv[agrep_argc] = (char *)my_malloc(4);
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = c;
- agrep_argv[agrep_argc][2] = '\0';
- agrep_argc ++;
- break;
-
- /*
- * Copy the set of options for agrep: put them in separate argvs
- * even if they are together after one '-' (easier to process).
- * These are agrep options which glimpse has to peek into.
- */
- default:
- agrep_argv[agrep_argc] = (char *)my_malloc(16);
- agrep_argv[agrep_argc][0] = '-';
- agrep_argv[agrep_argc][1] = c;
- agrep_argv[agrep_argc][2] = '\0';
- agrep_argc ++;
-
- if (c == 'n') {
- if (ByteLevelIndex) {
- NOBYTELEVEL = 1;
- fprintf(stderr, "Warning: -n is used with byte-level index: must SEARCH the files\n");
- }
- }
- if (c == 't') GOUTTAIL = 1;
- if (c == 'y') GNOPROMPT = 1;
- else if (c == 'h') GNOFILENAME = 1;
- else if (c == 'B') {
- GBESTMATCH = 1;
- my_B_index = agrep_argc - 1;
- }
- /* the following options are followed by a parameter */
- else if ((c == 'e') || (c == 'd') || (c == 'L') || (c == 'k')) {
- if (*(p + 1) == '\0') {/* space after - option */
- if(argc <= 1) {
- fprintf(stderr, "%s: the '-%c' option must have an argument\n", GProgname, c);
- RETURN(usage());
- }
- argv++;
- if ( (c == 'd') && ((D_length = strlen(argv[0])) > MAX_NAME_SIZE) ) {
- fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", GProgname, MAX_NAME_SIZE);
- RETURN(usage());
- /* Should this be RegionLimit if ByteLevelIndex? */
- }
- else if ((c == 'L') && ((GLIMITOUTPUT = atoi(argv[0])) < 0) ) {
- fprintf(stderr, "%s: invalid output limit %s\n", GProgname, argv[0]);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
- strcpy(agrep_argv[agrep_argc], argv[0]);
- if (c == 'd') {
- preprocess_delimiter(argv[0], D_length, GD_pattern, &GD_length);
- if (GOUTTAIL == 2) GOUTTAIL = 0;
- /* Should this be RegionLimit if ByteLevelIndex? */
- }
- argc--;
- } else {
- if ( (c == 'd') && ((D_length = strlen(p+1)) > MAX_NAME_SIZE) ) {
- fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", GProgname, MAX_NAME_SIZE);
- RETURN(usage());
- /* Should this be RegionLimit if ByteLevelIndex? */
- }
- else if ((c == 'L') && ((GLIMITOUTPUT = atoi(p+1)) < 0) ) {
- fprintf(stderr, "%s: invalid output limit %s\n", GProgname, p+1);
- RETURN(usage());
- }
- agrep_argv[agrep_argc] = (char *)my_malloc(strlen(p+1) + 2);
- strcpy(agrep_argv[agrep_argc], p+1);
- if (c == 'd') {
- preprocess_delimiter(p+1, D_length-2, GD_pattern, &GD_length);
- if (GOUTTAIL == 2) GOUTTAIL = 0;
- /* Should this be RegionLimit if ByteLevelIndex? */
- }
- }
- agrep_argc ++;
- #if DEBUG
- fprintf(stderr, "%d = %s\n", agrep_argc, agrep_argv[agrep_argc - 1]);
- #endif /*DEBUG*/
- quitwhile = ON;
- if ((c == 'e') || (c == 'k')) foundpat = 1;
- }
- /* else it is something that glimpse doesn't know and agrep needs to look at */
-
- break; /* from default: */
-
- } /* switch(c) */
- p ++;
- }
- } /* while (--argc > 0 && (*++argv)[0] == '-') */
-
- /* exitloop: */
-
- if ((GBESTMATCH == ON) && (MATCHFILE == ON) && (Only_first == ON))
- fprintf(stderr, "Warning: the number of matches may be incorrect when -B is used with -F.\n");
-
- if (GOUTTAIL) GOUTTAIL = 1;
-
- if (GNOFILENAME) {
- agrep_argv[my_A_index][1] = 'Z'; /* ignore the -A option */
- }
-
- if (argc > 0) {
- /* copy the rest of the options the pattern and the filenames if any verbatim */
- for (i=0; i<argc; i++) {
- if (agrep_argc >= MAX_ARGS) break;
- agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
- strcpy(agrep_argv[agrep_argc], argv[0]);
- agrep_argc ++;
- argv ++;
- }
- if (!foundpat) argc --;
- }
-
- #if 0
- for (j=0; j<agrep_argc; j++) printf("agrep_argv[%d] = %s\n", j, agrep_argv[j]);
- printf("argc = %d\n", argc);
- #endif /*0*/
-
- /*
- * Now perform the search by first looking at the index
- * and obtaining the files to search; and then search
- * them and output the result. If argc > 0, glimpse
- * runs as agrep: otherwise, it searches index, etc.
- */
-
- if (argc <= 0) {
- glimpse_call = 1;
- /* Initialize some data structures, read the index */
- if (GRECURSIVE == 1) {
- fprintf(stderr, "illegal option: '-r'\n");
- RETURN(usage());
- }
- num_terminals = 0;
- GParse = NULL;
- memset(terminals, '\0', sizeof(ParseTree) * MAXNUM_PAT);
- #if !ISSERVER
- if (-1 == read_index(indexdir)) RETURN(-1);
- #endif /*!ISSERVER*/
- WHOLEFILESCOPE = (WHOLEFILESCOPE || wholefilescope);
-
- if (ByteLevelIndex) {
- /* Must zero them here in addition to index search so that RETURN macro runs correctly */
- if ((src_offset_table == NULL) &&
- ((src_offset_table = (struct offsets **)my_malloc(sizeof(struct offsets *) * OneFilePerBlock)) == NULL)) exit(2);
- memset(src_offset_table, '\0', sizeof(struct offsets *) * OneFilePerBlock);
- for (i=0; i<MAXNUM_PAT; i++) {
- if ((multi_dest_offset_table[i] == NULL) &&
- ((multi_dest_offset_table[i] = (struct offsets **)my_malloc(sizeof(struct offsets *) * OneFilePerBlock)) == NULL)) exit(2);
- memset(multi_dest_offset_table[i], '\0', sizeof(struct offsets *) * OneFilePerBlock);
- }
- }
- read_filters(INDEX_DIR, UseFilters);
-
- if (glimpse_clientdied) RETURN(0);
- /* Now initialize agrep, set the options and get the actual pattern into GPattern */
- if ((GM = fileagrep_init(agrep_argc, agrep_argv, MAXPAT, GPattern)) <= 0) {
- /* this printf need not be there: agrep prints messages if error */
- RETURN(usage());
- }
- patindex = pattern_index;
- for (j=0; j<GM; j++) {
- if (GPattern[j] == '\\') j++;
- else if (test_indexable_char[GPattern[j]]) break;
- }
- if (j >= GM) {
- fprintf(stderr, "%s: pattern '%s' has no indexable characters: glimpse cannot search for it\n", GProgname, GPattern);
- RETURN(-1);
- }
-
- /* Split GPattern into individual boolean terms */
- if (split_pattern(GPattern, GM, APattern, terminals, &num_terminals, &GParse, StructuredIndex) <= 0) RETURN(-1);
- #if BG_DEBUG
- fprintf(debug, "GPattern = %s, APattern = %s, num_terminals = %d\n", GPattern, APattern, num_terminals);
- #endif /*BG_DEBUG*/
- if (foundattr) WHOLEFILESCOPE = 1; /* makes no sense to search attribute=value expressions without WHOLEFILESCOPE */
- else if (!ComplexBoolean && !((long)GParse & AND_EXP)) WHOLEFILESCOPE = 0; /* ORs can be done without WHOLEFILESCOPE */
- if (WHOLEFILESCOPE <= 0) agrep_argv[my_b_index][1] = 'Z';
- if (!ComplexBoolean && ((long)GParse & AND_EXP) && (my_l_index != -1)) agrep_argv[my_l_index][1] = 'Z';
-
- /* Now re-initialize agrep_argv with APattern instead of GPattern */
- my_free(agrep_argv[patindex], 0);
- AM=strlen(APattern);
- agrep_argv[patindex] = (char *)my_malloc(AM + 2);
- strcpy(agrep_argv[patindex], APattern);
-
- /*
- * Copy the agrep-options that are relevant to index search into
- * index_argv (see man-pages for which options are relevant).
- * Also, adjust patindex whenever options are skipped over.
- * NOTE: agrep_argv does NOT contain two options after one '-'.
- */
- index_argc = 0;
- for (j=0; j<agrep_argc; j++) {
- if (agrep_argv[j][0] == '-') {
- if ((agrep_argv[j][1] == 'c') || (agrep_argv[j][1] == 'h') || (agrep_argv[j][1] == 'l') || (agrep_argv[j][1] == 'n') ||
- (agrep_argv[j][1] == 's') || (agrep_argv[j][1] == 't') || (agrep_argv[j][1] == 'G') || (agrep_argv[j][1] == 'O') ||
- (agrep_argv[j][1] == 'b') || (agrep_argv[j][1] == 'i')) {
- patindex --;
- continue;
- }
- if ((agrep_argv[j][1] == 'd') || (agrep_argv[j][1] == 'L')) { /* skip over the argument too */
- j++;
- patindex -= 2;
- continue;
- }
- if ((agrep_argv[j][1] == 'e') || (agrep_argv[j][1] == 'm')) {
- strcpy(index_argv[index_argc], agrep_argv[j]);
- index_argc ++; j++;
- strcpy(index_argv[index_argc], agrep_argv[j]);
- if (agrep_argv[j-1][1] == 'm') patbufpos = index_argc; /* where to put the patbuf if fast-boolean by mgrep() */
- index_argc ++;
- }
- else { /* No arguments: just copy THAT option: maybe, change some options */
- strcpy(index_argv[index_argc], agrep_argv[j]);
- if (agrep_argv[j][1] == 'A') index_argv[index_argc][1] = 'h';
- else if (agrep_argv[j][1] == 'x') index_argv[index_argc][1] = 'w';
- index_argc++;
- }
- }
- else { /* This is either the pattern itself or a filename */
- strcpy(index_argv[index_argc], agrep_argv[j]);
- index_argc++;
- }
- }
- sprintf(index_argv[index_argc], "%s", INDEX_FILE);
- index_argc ++;
- #if 0
- for (j=0; j<index_argc; j++) printf("index_argv[%d] = %s\n", j, index_argv[j]);
- printf("patindex = %d\n", patindex);
- #endif /*0*/
-
- /* Search the index and process index-search-only options; Worry about file-pattern */
- ret = search_index(GParse);
- if (ret <= 0) RETURN(-1);
- num_blocks=0;
- if (OneFilePerBlock) {
- for(iii=0; iii<round(OneFilePerBlock, 8*sizeof(int)); iii++) {
- if (src_index_set[iii] == 0) continue;
- for (jjj=0; jjj < 8*sizeof(int); jjj++)
- if (src_index_set[iii] & mask_int[jjj])
- num_blocks ++;
- }
- if (num_blocks > OneFilePerBlock) num_blocks = OneFilePerBlock; /* roundoff */
- }
- else {
- GNumpartitions = get_table(P_TABLE, p_table, MAX_PARTITION, 0); /* Get partitions before calling get_filenames() */
- for (iii=0; iii<MAX_PARTITION; iii++)
- if (src_index_set[iii]) num_blocks++;
- }
- if (num_blocks <= 0) RETURN (0);
- if ((src_index_set[REAL_PARTITION - 1] == 1) && !Only_first && !OPTIMIZEBYTELEVEL) {
- fprintf(stderr, "Warning: pattern has words present in the stop-list: must SEARCH the files\n");
- }
- /* if just the NOBYTELEVEL flag is set, then it is an optimization which glimpse does and user need not be warned */
- #if DEBUG
- fprintf(stderr, "--> search=%d optimize=%d times=%d all=%d blocks=%d len=%d pat=%s scope=%d\n",
- NOBYTELEVEL, OPTIMIZEBYTELEVEL, src_index_set[REAL_PARTITION - 2], src_index_set[REAL_PARTITION - 1], num_blocks, strlen(APattern), APattern, WHOLEFILESCOPE);
- #endif /*DEBUG*/
- dummypat[0] = '\0';
- if (!MATCHFILE) { /* the argc,argv don't matter */
- get_filenames(src_index_set, 0, NULL, dummylen, dummypat, OneFilePerBlock);
-
- if (Only_first) { /* search the index only */
- fprintf(stderr, "There are matches to %d out of %d %s\n", num_blocks, (OneFilePerBlock > 0) ? OneFilePerBlock : GNumpartitions, (OneFilePerBlock > 0) ? "files" : "blocks");
- if (OneFilePerBlock && (num_blocks > 0)) {
- char cc[8];
- cc[0] = 'y';
- #if !ISSERVER
- if (!GNOPROMPT) {
- fprintf(stderr, "Do you want to see the file names? (y/n)");
- fgets(cc, 4, stdin);
- }
- #endif /*!ISSERVER*/
- if (cc[0] == 'y') {
- for (jjj=0; jjj<GNumfiles; jjj++)
- printf("%s\n", GTextfiles[jjj]);
- }
- }
- RETURN(0);
- }
- if (!OneFilePerBlock) searchpercent = num_blocks*100/GNumpartitions;
- else searchpercent = num_blocks * 100 / OneFilePerBlock;
- #if BG_DEBUG
- fprintf(debug, "searchpercent = %d, num_blocks = %d\n", searchpercent, num_blocks);
- #endif /*BG_DEBUG*/
- #if !ISSERVER
- if (!GNOPROMPT && (searchpercent > MAX_SEARCH_PERCENT)) {
- char cc[8];
- cc[0] = 'y';
- fprintf(stderr, "Your query may search about %d%% of the total space! Continue? (y/n)", searchpercent);
- fgets(cc, 4, stdin);
- if (cc[0] != 'y') RETURN(0);
- if (ByteLevelIndex && (searchpercent > DEF_MAX_INDEX_PERCENT)) NOBYTELEVEL = 1;
- }
- #endif /*!ISSERVER*/
- }
- else { /* set up the right options for -F in index_argv/index_argc itself since they will no longer be used */
- index_argc=0;
- strcpy(index_argv[0], GProgname);
-
- /* adding the -h option, which is safer for -F */
- index_argc ++;
- index_argv[index_argc][0] = '-';
- index_argv[index_argc][1] = 'h';
- index_argv[index_argc][2] = '\0';
- index_argc ++;
-
- /* new code: bgopal, Feb/8/94: deleted udi's code here */
- j = 0;
- while (FileOpt[j] == '-') {
- j++;
- while ((FileOpt[j] != ' ') && (FileOpt[j] != '\0') && (FileOpt[j] != '\n')) {
- if (j >= MAX_ARGS - 1) {
- fprintf(stderr, "%s: too many options after -F: %s\n", GProgname, FileOpt);
- RETURN(usage());
- }
- index_argv[index_argc][0] = '-';
- index_argv[index_argc][1] = FileOpt[j];
- index_argv[index_argc][2] = '\0';
- index_argc ++;
- j++;
- }
- if ((FileOpt[j] == '\0') || (FileOpt[j] == '\n')) break;
- if ((FileOpt[j] == ' ') && (FileOpt[j-1] == '-')) {
- fprintf(stderr, "%s: illegal option: '-' after -F\n", GProgname);
- RETURN(usage());
- }
- else if (FileOpt[j] == ' ') while(FileOpt[j] == ' ') j++;
- }
- while(FileOpt[j] == ' ') j++;
-
- fileopt_length = strlen(FileOpt);
- strncpy(index_argv[index_argc],FileOpt+j,fileopt_length-j);
- index_argv[index_argc][fileopt_length-j] = '\0';
- index_argc++;
- my_free(FileOpt, MAXFILEOPT);
- FileOpt = NULL;
-
- #if BG_DEBUG
- fprintf(debug, "pattern to check with -F = %s\n",index_argv[index_argc-1]);
- #endif /*BG_DEBUG*/
- #if DEBUG
- fprintf(stderr, "-F : ");
- for (jj=0; jj < index_argc; jj++)
- fprintf(stderr, " %s ",index_argv[jj]);
- fprintf(stderr, "\n");
- #endif /*DEBUG*/
- fflush(stdout);
- get_filenames(src_index_set, index_argc, index_argv, dummylen, dummypat, OneFilePerBlock);
-
- /* Assume #files per partitions is appx constant */
- if (OneFilePerBlock) num_blocks = GNumfiles;
- else num_blocks = GNumfiles * GNumpartitions / p_table[GNumpartitions - 1];
- if (Only_first) { /* search the index only */
- fprintf(stderr, "There are matches to %d out of %d %s\n", num_blocks, (OneFilePerBlock > 0) ? OneFilePerBlock : GNumpartitions, (OneFilePerBlock > 0) ? "files" : "blocks");
- if (OneFilePerBlock && (num_blocks > 0)) {
- char cc[8];
- cc[0] = 'y';
- #if !ISSERVER
- if (!GNOPROMPT) {
- fprintf(stderr, "Do you want to see the file names? (y/n)");
- fgets(cc, 4, stdin);
- }
- #endif /*!ISSERVER*/
- if (cc[0] == 'y') {
- for (jjj=0; jjj<GNumfiles; jjj++)
- printf("%s\n", GTextfiles[jjj]);
- }
- }
- RETURN(0);
- }
- if (OneFilePerBlock) searchpercent = GNumfiles * 100 / OneFilePerBlock;
- else searchpercent = GNumfiles * 100 / p_table[GNumpartitions - 1];
- #if BG_DEBUG
- fprintf(debug, "searchpercent = %d, num_files = %d\n", searchpercent, p_table[GNumpartitions - 1]);
- #endif /*BG_DEBUG*/
- #if !ISSERVER
- if (!GNOPROMPT && (searchpercent > MAX_SEARCH_PERCENT)) {
- char cc[8];
- cc[0] = 'y';
- fprintf(stderr, "Your query may search about %d%% of the total space! Continue? (y/n)", searchpercent);
- fgets(cc, 4, stdin);
- if (cc[0] != 'y') RETURN(0);
- if (ByteLevelIndex && (searchpercent > DEF_MAX_INDEX_PERCENT)) NOBYTELEVEL = 1;
- }
- #endif /*!ISSERVER*/
- }
-
- /* Replace -B by the number of errors if best-match */
- if (GBESTMATCH && (my_B_index >= 0)) {
- sprintf(&agrep_argv[my_B_index][1], "%d", bestmatcherrors);
- #if BG_DEBUG
- fprintf(debug, "Changing -B to -%d\n", bestmatcherrors);
- #endif /*BG_DEBUG*/
- }
- agrep_argv[my_M_index][1] = 'Z';
- agrep_argv[my_P_index][1] = 'Z';
- if (!ComplexBoolean && ((long)GParse & AND_EXP) && (my_l_index != -1) && !WHOLEFILESCOPE) agrep_argv[my_l_index][1] = 'l';
-
- if (GNumfiles <= 0) RETURN(0);
- if (glimpse_clientdied) RETURN(0);
- /* must reinitialize since the above agrep calls for index-search ruined the real options: it is required EVEN IF ByteLevelIndex */
- AM = fileagrep_init(agrep_argc, agrep_argv, MAXPAT, APattern);
- /* do acutal search with postfiltering if structured query */
- if (WHOLEFILESCOPE <= 0) {
- if (!UseFilters) {
- if (!ByteLevelIndex || NOBYTELEVEL) {
- fileagrep_search(AM, APattern, GNumfiles, GTextfiles, 0, stdout);
- }
- else {
- for (i=0; i<GNumfiles; i++) {
- SetCurrentFileName = 1;
- strcpy(CurrentFileName, GTextfiles[i]);
- if (stat(CurrentFileName, &file_stat_buf) == -1) continue;
- if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
- /* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
- free_list(&src_offset_table[GFileIndex[i]]);
- first_search = 1;
- if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) gnum_of_matched += ret;
- }
- else if ((ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GFileIndex[i], src_offset_table, stdout)) > 0) gnum_of_matched += ret;
- SetCurrentFileName = 0;
- if (GLIMITOUTPUT > 0) {
- if (GLIMITOUTPUT <= gnum_of_matched) break;
- LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
- }
- if ((ret < 0) && REGEX && (WORDBOUND || DELIMITER)) break;
- if (glimpse_clientdied) break;
- fflush(stdout);
- }
- }
- }
- else {
- sprintf(outname[0], "%s/.glimpse_apply.%d", TEMP_DIR, getpid());
- for (i=0; i<GNumfiles; i++) {
- if (apply_filter(GTextfiles[i], outname[0]) == 1) {
- SetCurrentFileName = 1;
- strcpy(CurrentFileName, GTextfiles[i]);
- if (stat(CurrentFileName, &file_stat_buf) == -1) continue;
- if (!ByteLevelIndex || NOBYTELEVEL || (file_stat_buf.st_mtime > index_stat_buf.st_mtime)) {
- first_search = 1;
- if ((ret = fileagrep_search(AM, APattern, 1, outname, 0, stdout)) > 0) gnum_of_matched += ret;
- }
- else {
- if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
- /* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
- free_list(&src_offset_table[GFileIndex[i]]);
- first_search = 1;
- if ((ret = fileagrep_search(AM, APattern, 1, outname, 0, stdout)) > 0) gnum_of_matched += ret;
- }
- else if ((ret = glimpse_search(AM, APattern, GD_length, GD_pattern, outname[0], GFileIndex[i], src_offset_table, stdout)) > 0) gnum_of_matched += ret;
- }
- unlink(outname[0]);
- SetCurrentFileName = 0;
- }
- else {
- if (!ByteLevelIndex || NOBYTELEVEL) {
- first_search = 1;
- if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) gnum_of_matched += ret;
- }
- else {
- SetCurrentFileName = 1;
- strcpy(CurrentFileName, GTextfiles[i]);
- if (stat(CurrentFileName, &file_stat_buf) == -1) continue;
- if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
- /* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
- free_list(&src_offset_table[GFileIndex[i]]);
- first_search = 1;
- if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) gnum_of_matched += ret;
- }
- else if ((ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GFileIndex[i], src_offset_table, stdout)) > 0) gnum_of_matched += ret;
- SetCurrentFileName = 0;
- }
- }
- if (GLIMITOUTPUT > 0) {
- if (GLIMITOUTPUT <= gnum_of_matched) break;
- LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
- }
- if ((ret < 0) && REGEX && (WORDBOUND || DELIMITER)) break;
- if (glimpse_clientdied) break;
- fflush(stdout);
- }
- }
- }
- else { /* Filters do not apply here */
- FILE *tmpfp = NULL; /* to store structured query-search output */
- int OLDLIMITOUTPUT; /* don't use LIMITOUTPUT for search: only for filtering=identify_region(): agrep NEVER changes LIMITOUTPUT */
-
- for (i=0; i<GNumfiles; i++) {
- OLDLIMITOUTPUT = LIMITOUTPUT;
- LIMITOUTPUT = 0;
- if ((tmpfp = fopen(tempfile, "w")) == NULL) {
- fprintf(stderr, "%s: cannot open for writing: %s, errno=%d\n", GProgname, tempfile, errno);
- RETURN(usage());
- }
- SetCurrentFileName = 1;
- strcpy(CurrentFileName, GTextfiles[i]);
- if (!ByteLevelIndex || NOBYTELEVEL) {
- first_search = 1;
- ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, tmpfp);
- }
- else {
- if (stat(CurrentFileName, &file_stat_buf) == -1) {
- fclose(tmpfp);
- continue;
- }
- if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
- /* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
- free_list(&src_offset_table[GFileIndex[i]]);
- first_search = 1;
- ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, tmpfp);
- }
- else ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GFileIndex[i], src_offset_table, tmpfp);
- }
- SetCurrentFileName = 0;
- fflush(tmpfp);
- fclose(tmpfp);
- tmpfp = NULL;
- if ((ret < 0) && REGEX && (WORDBOUND || DELIMITER)) break;
- #if DEBUG
- printf("done search\n");
- fflush(stdout);
- #endif /*DEBUG*/
- LIMITOUTPUT = OLDLIMITOUTPUT;
- ret = filter_output(GTextfiles[i], tempfile, GParse, GD_pattern, GD_length, GOUTTAIL, nullfp, StructuredIndex);
- gnum_of_matched += (ret > 0) ? ret : 0;
- if (GLIMITOUTPUT > 0) {
- if (GLIMITOUTPUT <= gnum_of_matched) break;
- LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
- }
- if (glimpse_clientdied) break;
- fflush(stdout);
- }
- }
-
- RETURN(0);
- }
- else { /* argc > 0: simply call agrep */
- #if DEBUG
- for (i=0; i<agrep_argc; i++)
- printf("agrep_argv[%d] = %s\n", i, agrep_argv[i]);
- #endif /*DEBUG*/
- i = fileagrep(oldargc, oldargv, 0, stdout);
- RETURN(i);
- }
- }
- /* end of process_query() */
-
- /*
- * Simple function to remove the non-existent files from the set of
- * files passed onto agrep for search. These are the files which got
- * DELETED after the index was built (but a fresh index was NOT built).
- * Redundant since agrep opens them anyway and stat is as bad as open.
- */
- int
- purge_filenames(filenames, num)
- CHAR **filenames;
- int num;
- {
- struct stat buf;
- int i, j;
- int newnum = num;
- int ret;
-
- for (i=0; i<newnum; i++) {
- if (-1 == (ret = stat(filenames[i], &buf))) {
- #if BG_DEBUG
- fprintf(debug, "stat on %s = %d\n", filenames[i], ret);
- #endif /*BG_DEBUG*/
- my_free(filenames[i], 0);
- for (j=i; j<newnum-1; j++)
- filenames[j] = filenames[j+1];
- filenames[j] = NULL;
- newnum --;
- i--; /* offset the increment: start from current */
- }
- }
-
- #if BG_DEBUG
- fprintf(debug, "Old numfiles=%d\tNew numfiles=%d\n", num, newnum);
- for (i=0; i<newnum; i++)
- fprintf(debug, "file %d = %s\n", i, filenames[i]);
- #endif /*BG_DEBUG*/
- return newnum;
- }
-
- CHAR filter_buf[BLOCKSIZE + MAXPAT*2];
-
- /* returns #of bytes stripped off */
- int getbyteoff(buf, pbyteoff)
- CHAR *buf;
- int *pbyteoff;
- {
- CHAR temp[32];
- int i = 0;
-
- while (isdigit(*buf) && (i<32)) temp[i++] = *buf++;
- if ((*buf != '=') || (*(buf + 1) != ' ')) return -1;
- temp[i] = '\0';
- *pbyteoff = atoi(temp);
- return i+2;
- }
-
- /*
- * Filter the output in infile:
- *
- * -- get the matched line/record-s using GD_pattern, GD_length and GOUTAIL
- * -- call identify regions using matched line/record's byte offset
- * -- collect patterns corr. to that attribute into a new pattern (in split_pat itself)
- * -- see if one of them matches that line/record using memagrep
- * -- if so, output that line/record onto stdout
- */
- int
- filter_output(infile, outfile, GParse, GD_pattern, GD_length, GOUTTAIL, nullfp, num_attr)
- char *infile;
- char *outfile;
- ParseTree *GParse;
- CHAR GD_pattern[];
- int GD_length[];
- int GOUTTAIL;
- FILE *nullfp;
- int num_attr;
- {
- FILE *outfp;
- FILE *displayfp = NULL;
- FILE *storefp = NULL;
- int num_read;
- int residue = 0;
- int byteoff;
- int attribute;
- int i, ii; /* i is forloop index, ii is booleaneval index */
- CHAR *final_end;
- CHAR *current_end;
- CHAR *current_begin;
- CHAR *previous_begin;
- int skiplen;
- char s[MAX_LINE_LEN];
- CHAR c1, c2;
- int printed, numprinted = 0; /* returns number of printed records if successful in matching the pattern in the object infile */
- char *attrname;
- int success = 0; /* do we print the stored output or not */
-
- #if BG_DEBUG
- printf("INFILE=%s\n", infile);
- printf("OUTFILE\n");
- sprintf(s, "cat %s\n", outfile);
- system(s);
- #endif /*BG_DEBUG*/
- if ((outfp = fopen(outfile, "r")) == NULL) return 0;
- if (StructuredIndex && (-1 == region_create(infile))) {
- fclose(outfp);
- return 0;
- }
- if (ComplexBoolean || ((long)GParse & AND_EXP)) {
- sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
- if ((displayfp = storefp = fopen(s, "w")) == NULL) {
- if (StructuredIndex) region_destroy();
- fclose(outfp);
- return 0;
- }
- }
- else {
- displayfp = stdout;
- /* cannot come to filter_output in this case! */
- }
- memset(matched_terminals, '\0', num_terminals);
-
- while ( ( (num_read = fread(filter_buf + residue, 1, BLOCKSIZE - residue, outfp)) > 0) || (residue > 0)) {
- if (num_read <= 0) {
- final_end = filter_buf + residue;
- num_read = residue;
- residue = 0;
- }
- else {
- num_read += residue;
- final_end = (CHAR *)backward_delimiter(filter_buf + num_read, filter_buf, GD_pattern, GD_length, GOUTTAIL);
- residue = filter_buf + num_read - final_end;
- }
- #if DEBUG
- fprintf(stderr, "filter_buf=%x final_end=%x residue=%x last_chars=%c%c%c num_read=%x\n",
- filter_buf, final_end, residue, *(final_end-2), *(final_end-1), *(final_end), num_read);
- #endif /*DEBUG*/
-
- current_begin = previous_begin = filter_buf;
- current_end = (CHAR *)forward_delimiter(filter_buf, filter_buf + num_read, GD_pattern, GD_length, GOUTTAIL); /* skip over prefixes like filename */
- if (!GOUTTAIL) current_end = (CHAR *)forward_delimiter((long)current_end + GD_length, final_end, GD_pattern, GD_length, GOUTTAIL);
-
- while (current_end <= final_end) {
- previous_begin = current_begin;
- /* look for %d= */
- byteoff = -1;
- while (current_begin < current_end) {
- if (isdigit(*current_begin)) {
- skiplen = getbyteoff(current_begin, &byteoff);
- #if BG_DEBUG
- fprintf(debug, "byteoff=%d skiplen=%d\n", byteoff, skiplen);
- #endif /*BG_DEBUG*/
- if ((skiplen < 0) || (byteoff < 0)) {
- current_begin ++;
- continue;
- }
- else break;
- }
- else current_begin ++;
- }
- #if DEBUG
- printf("current_begin=%x current_end=%x final_end=%x residue=%x num_read=%x\n", current_begin, current_end, final_end, residue, num_read);
- #endif /*DEBUG*/
-
- #if DEBUG
- printf("byteoff=%d skiplen=%d\n", byteoff, skiplen);
- #endif /*DEBUG*/
- if ((skiplen < 0) || (byteoff < 0)) { /* output the whole line as it is: there is nothing to strip (e.g., -l) */
- fwrite(previous_begin, 1, current_end-previous_begin, displayfp);
- numprinted ++;
- }
- else if ( (num_attr <= 0) || (((attribute = region_identify(byteoff, 0)) < num_attr) && (attribute >= 0)) ) {
- /* prefix is from previous_begin to current_begin. Skip skiplen from current_begin. Rest until current_end is valid output */
- if (num_attr <= 0) attribute = 0;
- #if BG_DEBUG
- fprintf(debug, "region@%d=%d\n", byteoff, attribute);
- #endif /*BG_DEBUG*/
- c1 = *(current_begin + skiplen - 1);
- c2 = *(current_end + 1);
- printed = 0;
-
- if (!success) {
- if (ComplexBoolean) {
- success = eval_tree(GParse, matched_terminals);
- }
- else {
- if ((long)GParse & AND_EXP) {
- success = 0;
- for (ii=0; ii<num_terminals; ii++) {
- if (!matched_terminals[ii]) break;
- }
- if (ii >= num_terminals) success = 1;
- }
- else {
- success = 0;
- /* cannot come to filter_output in this case! */
- }
- }
- }
-
- /*
- * Search for the value in the terminals array corr. to the matched attribute in the buffer and set matched_terminals.
- * Cannot skip evaluating midway once expression is satisfied since I have to store the output in the temp file anyway.
- */
- if (!success) for (i=0; i<num_terminals; i++) {
- if (matched_terminals[i]) { /* success is still 0 otherwise wouldn't have (re)entered the for loop */
- if (!((LIMITOUTPUT > 0) && (numprinted >= LIMITOUTPUT)) && !printed) { /* see if it was useful later */
- fwrite(previous_begin, 1, current_begin - previous_begin, displayfp);
- if (PRINTATTR) fprintf(displayfp, "%s# ",
- (attrname = attr_id_to_name(attribute)) == NULL ? "(null)" : attrname);
- fwrite(current_begin + skiplen, 1, current_end - current_begin - skiplen, displayfp);
- printed = 1;
- numprinted ++;
- }
- continue;
- }
-
- if ((terminals[i].data.leaf.attribute == 0) || (terminals[i].data.leaf.attribute == attribute)) {
- *(current_begin + skiplen - 1) = '\n';
- *(current_end + 1) = '\n';
- if (memagrep_search( strlen(terminals[i].data.leaf.value), terminals[i].data.leaf.value,
- current_end - current_begin - skiplen + 1, current_begin + skiplen - 1,
- 0, nullfp) > 0) {
- #if 0
- *(current_end + 1) = '\0';
- printf("--> search succeeded for %s in %s\n", terminals[i].data.leaf.value, previous_begin);
- #endif /*0*/
- *(current_begin + skiplen - 1) = c1;
- *(current_end + 1) = c2;
- matched_terminals[i] = 1;
-
- if (!success) {
- if (ComplexBoolean) {
- success = eval_tree(GParse, matched_terminals);
- }
- else {
- if ((long)GParse & AND_EXP) {
- success = 0;
- for (ii=0; ii<num_terminals; ii++) {
- if (!matched_terminals[ii]) break;
- }
- if (ii >= num_terminals) success = 1;
- }
- else {
- success = 0;
- /* cannot come to filter_output in this case! */
- }
- }
- }
-
- if (success) break;
-
- if (!((LIMITOUTPUT > 0) && (numprinted >= LIMITOUTPUT)) && !printed) { /* see if it was useful later */
- fwrite(previous_begin, 1, current_begin - previous_begin, displayfp);
- if (PRINTATTR) fprintf(displayfp, "%s# ",
- (attrname = attr_id_to_name(attribute)) == NULL ? "(null)" : attrname);
- fwrite(current_begin + skiplen, 1, current_end - current_begin - skiplen, displayfp);
- printed = 1;
- numprinted ++;
- }
- }
- else {
- #if 0
- *(current_end + 1) = '\0';
- printf("--> search failed for %s in %s\n", terminals[i].data.leaf.value, previous_begin);
- #endif /*0*/
- *(current_begin + skiplen - 1) = c1;
- *(current_end + 1) = c2;
- }
- }
- }
-
- if (success) { /* dump out everything in store, then just output everything without searching */
- if (GFILENAMEONLY) { /* all other output options are useless since they all deal with the MATCHED line */
- fprintf(stdout, "%s\n", infile);
- if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
- storefp = NULL;
- goto unlink_and_quit;
- }
- else if (storefp != NULL) {
- fflush(storefp);
- fclose(storefp);
- sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
- if ((storefp = fopen(s, "r")) != NULL) {
- while (fgets(s, MAX_LINE_LEN, storefp) != NULL) fputs(s, stdout);
- fclose(storefp);
- }
- storefp = NULL;
- displayfp = stdout;
- }
-
- if (!((LIMITOUTPUT > 0) && (numprinted >= LIMITOUTPUT)) && !printed) {
- fwrite(previous_begin, 1, current_begin - previous_begin, displayfp);
- if (PRINTATTR) fprintf(displayfp, "%s# ",
- (attrname = attr_id_to_name(attribute)) == NULL ? "(null)" : attrname);
- fwrite(current_begin + skiplen, 1, current_end - current_begin - skiplen, displayfp);
- printed = 1;
- numprinted ++;
- }
- }
- }
- if (glimpse_clientdied) break;
- if (current_end >= final_end) break;
- current_begin = current_end;
- if (!GOUTTAIL) current_end = (CHAR *)forward_delimiter((long)current_end + GD_length, final_end, GD_pattern, GD_length, GOUTTAIL);
- else current_end = (CHAR *)forward_delimiter(current_end, final_end, GD_pattern, GD_length, GOUTTAIL);
- }
- if (residue > 0) memcpy(filter_buf, final_end, residue);
- }
-
- if (success) goto unlink_and_quit;
-
- if (ComplexBoolean) {
- success = eval_tree(GParse, matched_terminals);
- }
- else {
- if ((long)GParse & AND_EXP) {
- success = 0;
- for (ii=0; ii<num_terminals; ii++) {
- if (!matched_terminals[ii]) break;
- }
- if (ii >= num_terminals) success = 1;
- }
- else {
- success = 0;
- /* cannot come to filter_output in this case! */
- }
- }
-
- /* Print the temporary output onto stdout if search was successful; unlink the temprorary file */
- if (success) {
- if (GFILENAMEONLY) { /* all other output options are useless since they all deal with the MATCHED line */
- fprintf(stdout, "%s\n", infile);
- if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
- storefp = NULL;
- }
- else if (storefp != NULL) {
- fflush(storefp);
- fclose(storefp);
- #if DEBUG
- printf("STOREOUTPUT\n");
- sprintf(s, "cat %s/.glimpse_storeoutput.%d\n", TEMP_DIR, getpid());
- system(s);
- #endif /*DEBUG*/
- sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
- if ((storefp = fopen(s, "r")) != NULL) {
- while (fgets(s, MAX_LINE_LEN, storefp) != NULL) fputs(s, stdout);
- fclose(storefp);
- }
- storefp = NULL;
- }
- }
- else {
- if (storefp != NULL) fclose(storefp); /* else don't bother to flush */
- }
-
- unlink_and_quit:
- sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
- unlink(s);
-
- if (StructuredIndex) region_destroy();
- fclose(outfp);
-
- if (GFILENAMEONLY) {
- if (numprinted > 0) return 1;
- else return 0;
- }
- else return numprinted;
- }
-
- usage()
- {
- fprintf(stderr, "\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- fprintf(stderr, "usage: %s [-#abcdehiklnprstwxyBCDGIMSVW] [-F pat] [-H dir] [-J host] [-K port] [-L num] [-R lim] [-T dir] pattern [files]", GProgname);
- fprintf(stderr, "\n");
- fprintf(stderr, "summary of frequently used options:\n");
- fprintf(stderr, "(For a more detailed listing see 'man glimpse'.)\n");
- fprintf(stderr, "-#: find matches with at most # errors\n");
- fprintf(stderr, "-c: output the number of matched records\n");
- fprintf(stderr, "-d: define record delimiter\n");
- fprintf(stderr, "-h: do not output file names\n");
- fprintf(stderr, "-i: case-insensitive search, e.g., 'a' = 'A'\n");
- fprintf(stderr, "-l: output the names of files that contain a match\n");
- fprintf(stderr, "-n: output record prefixed by record number\n");
- /* fprintf(stderr, "-v: output those records that have no matches\n"); */
- fprintf(stderr, "-w: pattern has to match as a word, e.g., 'win' will not match 'wind'\n");
- fprintf(stderr, "-B: best match mode. find the closest matches to the pattern\n");
- fprintf(stderr, "-F 'pat': 'pat' is used to match against file names\n");
- fprintf(stderr, "-G: output the (whole) files that contain a match\n");
- fprintf(stderr, "-H 'dir': the glimpse index is located in directory 'dir'\n");
- fprintf(stderr, "-L 'num': limit the output to 'num' records only\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "For questions about glimpse, please contact `%s'\n", GLIMPSE_EMAIL);
-
- return -1; /* useful if we make glimpse into a library */
- }
-
- usageS()
- {
- fprintf(stderr, "\nThis is glimpse server version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
- fprintf(stderr, "usage: %s [-H dir] [-J host] [-K port]", GProgname);
- fprintf(stderr, "\n");
- fprintf(stderr, "-H 'dir': the glimpse index is located in directory 'dir'\n");
- fprintf(stderr, "-J 'host': the host name (string) clients must use / server runs on \n");
- fprintf(stderr, "-K 'port': the port (short integer) clients must use / server runs on \n");
- fprintf(stderr, "\n");
- fprintf(stderr, "For questions about glimpse, please contact `%s'\n", GLIMPSE_EMAIL);
-
- return -1; /* useful if we make glimpse into a library */
- }
-
- #if CLIENTSERVER
- /*
- * do_select() - based on select_loop() from the Harvest Broker.
- * -- Courtesy: Darren Hardy, hardy@cs.colorado.edu
- */
- int do_select(sock, sec)
- int sock; /* the socket to wait for */
- int sec; /* the number of seconds to wait */
- {
- struct timeval to;
- fd_set qready;
- int err;
-
- if (sock < 0 || sec < 0)
- return 0;
-
- FD_ZERO(&qready);
- FD_SET(sock, &qready);
- to.tv_sec = sec;
- to.tv_usec = 0;
- if ((err = select(sock + 1, &qready, NULL, NULL, &to)) < 0) {
- if (errno == EINTR)
- return 0;
- perror("select");
- return -1;
- }
- if (err == 0)
- return 0;
-
- /* If there's someone waiting to get it, let them through */
- return (FD_ISSET(sock, &qready) ? 1 : 0);
- }
- #endif /* CLIENTSERVER */
-